OSDN Git Service

c66e5a588ae3c0c0e3927fceca093a4944c05332
[android-x86/external-swiftshader.git] / src / Reactor / LLVMReactor.cpp
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "Nucleus.hpp"
16
17 #include "llvm/Support/IRBuilder.h"
18 #include "llvm/Function.h"
19 #include "llvm/GlobalVariable.h"
20 #include "llvm/Module.h"
21 #include "llvm/LLVMContext.h"
22 #include "llvm/Constants.h"
23 #include "llvm/Intrinsics.h"
24 #include "llvm/PassManager.h"
25 #include "llvm/Analysis/LoopPass.h"
26 #include "llvm/Transforms/Scalar.h"
27 #include "llvm/Target/TargetData.h"
28 #include "llvm/Target/TargetOptions.h"
29 #include "llvm/Support/TargetSelect.h"
30 #include "../lib/ExecutionEngine/JIT/JIT.h"
31
32 #include "LLVMRoutine.hpp"
33 #include "LLVMRoutineManager.hpp"
34 #include "x86.hpp"
35 #include "CPUID.hpp"
36 #include "Thread.hpp"
37 #include "Memory.hpp"
38 #include "MutexLock.hpp"
39
40 #include <fstream>
41
42 #if defined(__i386__) || defined(__x86_64__)
43 #include <xmmintrin.h>
44 #endif
45
46 #if defined(__x86_64__) && defined(_WIN32)
47 extern "C" void X86CompilationCallback()
48 {
49         assert(false);   // UNIMPLEMENTED
50 }
51 #endif
52
53 extern "C"
54 {
55         bool (*CodeAnalystInitialize)() = 0;
56         void (*CodeAnalystCompleteJITLog)() = 0;
57         bool (*CodeAnalystLogJITCode)(const void *jitCodeStartAddr, unsigned int jitCodeSize, const wchar_t *functionName) = 0;
58 }
59
60 namespace llvm
61 {
62         extern bool JITEmitDebugInfo;
63 }
64
65 namespace
66 {
67         sw::LLVMRoutineManager *routineManager = nullptr;
68         llvm::ExecutionEngine *executionEngine = nullptr;
69         llvm::IRBuilder<> *builder = nullptr;
70         llvm::LLVMContext *context = nullptr;
71         llvm::Module *module = nullptr;
72         llvm::Function *function = nullptr;
73
74         sw::MutexLock codegenMutex;
75 }
76
77 namespace sw
78 {
79         Optimization optimization[10] = {InstructionCombining, Disabled};
80
81         enum EmulatedType
82         {
83                 Type_v2i32,
84                 Type_v4i16,
85                 Type_v2i16,
86                 Type_v8i8,
87                 Type_v4i8,
88                 Type_v2f32,
89                 EmulatedTypeCount
90         };
91
92         class Value : public llvm::Value {};
93         class SwitchCases : public llvm::SwitchInst {};
94         class BasicBlock : public llvm::BasicBlock {};
95
96         llvm::Type *T(Type *t)
97         {
98                 uintptr_t type = reinterpret_cast<uintptr_t>(t);
99                 if(type < EmulatedTypeCount)
100                 {
101                         // Use 128-bit vectors to implement logically shorter ones.
102                         switch(type)
103                         {
104                         case Type_v2i32: return T(Int4::getType());
105                         case Type_v4i16: return T(Short8::getType());
106                         case Type_v2i16: return T(Short8::getType());
107                         case Type_v8i8:  return T(Byte16::getType());
108                         case Type_v4i8:  return T(Byte16::getType());
109                         case Type_v2f32: return T(Float4::getType());
110                         default: assert(false);
111                         }
112                 }
113
114                 return reinterpret_cast<llvm::Type*>(t);
115         }
116
117         inline Type *T(llvm::Type *t)
118         {
119                 return reinterpret_cast<Type*>(t);
120         }
121
122         Type *T(EmulatedType t)
123         {
124                 return reinterpret_cast<Type*>(t);
125         }
126
127         inline Value *V(llvm::Value *t)
128         {
129                 return reinterpret_cast<Value*>(t);
130         }
131
132         inline std::vector<llvm::Type*> &T(std::vector<Type*> &t)
133         {
134                 return reinterpret_cast<std::vector<llvm::Type*>&>(t);
135         }
136
137         inline BasicBlock *B(llvm::BasicBlock *t)
138         {
139                 return reinterpret_cast<BasicBlock*>(t);
140         }
141
142         static size_t typeSize(Type *type)
143         {
144                 uintptr_t t = reinterpret_cast<uintptr_t>(type);
145                 if(t < EmulatedTypeCount)
146                 {
147                         switch(t)
148                         {
149                         case Type_v2i32: return 8;
150                         case Type_v4i16: return 8;
151                         case Type_v2i16: return 4;
152                         case Type_v8i8:  return 8;
153                         case Type_v4i8:  return 4;
154                         case Type_v2f32: return 8;
155                         default: assert(false);
156                         }
157                 }
158
159                 return T(type)->getPrimitiveSizeInBits() / 8;
160         }
161
162         Nucleus::Nucleus()
163         {
164                 ::codegenMutex.lock();   // Reactor and LLVM are currently not thread safe
165
166                 llvm::InitializeNativeTarget();
167                 llvm::JITEmitDebugInfo = false;
168
169                 if(!::context)
170                 {
171                         ::context = new llvm::LLVMContext();
172                 }
173
174                 ::module = new llvm::Module("", *::context);
175                 ::routineManager = new LLVMRoutineManager();
176
177                 #if defined(__x86_64__)
178                         const char *architecture = "x86-64";
179                 #else
180                         const char *architecture = "x86";
181                 #endif
182
183                 llvm::SmallVector<std::string, 1> MAttrs;
184                 MAttrs.push_back(CPUID::supportsMMX()    ? "+mmx"   : "-mmx");
185                 MAttrs.push_back(CPUID::supportsCMOV()   ? "+cmov"  : "-cmov");
186                 MAttrs.push_back(CPUID::supportsSSE()    ? "+sse"   : "-sse");
187                 MAttrs.push_back(CPUID::supportsSSE2()   ? "+sse2"  : "-sse2");
188                 MAttrs.push_back(CPUID::supportsSSE3()   ? "+sse3"  : "-sse3");
189                 MAttrs.push_back(CPUID::supportsSSSE3()  ? "+ssse3" : "-ssse3");
190                 MAttrs.push_back(CPUID::supportsSSE4_1() ? "+sse41" : "-sse41");
191
192                 std::string error;
193                 llvm::TargetMachine *targetMachine = llvm::EngineBuilder::selectTarget(::module, architecture, "", MAttrs, llvm::Reloc::Default, llvm::CodeModel::JITDefault, &error);
194                 ::executionEngine = llvm::JIT::createJIT(::module, 0, ::routineManager, llvm::CodeGenOpt::Aggressive, true, targetMachine);
195
196                 if(!::builder)
197                 {
198                         ::builder = new llvm::IRBuilder<>(*::context);
199
200                         #if defined(_WIN32)
201                                 HMODULE CodeAnalyst = LoadLibrary("CAJitNtfyLib.dll");
202                                 if(CodeAnalyst)
203                                 {
204                                         CodeAnalystInitialize = (bool(*)())GetProcAddress(CodeAnalyst, "CAJIT_Initialize");
205                                         CodeAnalystCompleteJITLog = (void(*)())GetProcAddress(CodeAnalyst, "CAJIT_CompleteJITLog");
206                                         CodeAnalystLogJITCode = (bool(*)(const void*, unsigned int, const wchar_t*))GetProcAddress(CodeAnalyst, "CAJIT_LogJITCode");
207
208                                         CodeAnalystInitialize();
209                                 }
210                         #endif
211                 }
212         }
213
214         Nucleus::~Nucleus()
215         {
216                 delete ::executionEngine;
217                 ::executionEngine = nullptr;
218
219                 ::routineManager = nullptr;
220                 ::function = nullptr;
221                 ::module = nullptr;
222
223                 ::codegenMutex.unlock();
224         }
225
226         Routine *Nucleus::acquireRoutine(const wchar_t *name, bool runOptimizations)
227         {
228                 if(::builder->GetInsertBlock()->empty() || !::builder->GetInsertBlock()->back().isTerminator())
229                 {
230                         llvm::Type *type = ::function->getReturnType();
231
232                         if(type->isVoidTy())
233                         {
234                                 createRetVoid();
235                         }
236                         else
237                         {
238                                 createRet(V(llvm::UndefValue::get(type)));
239                         }
240                 }
241
242                 if(false)
243                 {
244                         std::string error;
245                         llvm::raw_fd_ostream file("llvm-dump-unopt.txt", error);
246                         ::module->print(file, 0);
247                 }
248
249                 if(runOptimizations)
250                 {
251                         optimize();
252                 }
253
254                 if(false)
255                 {
256                         std::string error;
257                         llvm::raw_fd_ostream file("llvm-dump-opt.txt", error);
258                         ::module->print(file, 0);
259                 }
260
261                 void *entry = ::executionEngine->getPointerToFunction(::function);
262                 LLVMRoutine *routine = ::routineManager->acquireRoutine(entry);
263
264                 if(CodeAnalystLogJITCode)
265                 {
266                         CodeAnalystLogJITCode(routine->getEntry(), routine->getCodeSize(), name);
267                 }
268
269                 return routine;
270         }
271
272         void Nucleus::optimize()
273         {
274                 static llvm::PassManager *passManager = nullptr;
275
276                 if(!passManager)
277                 {
278                         passManager = new llvm::PassManager();
279
280                         llvm::UnsafeFPMath = true;
281                 //      llvm::NoInfsFPMath = true;
282                 //      llvm::NoNaNsFPMath = true;
283
284                         passManager->add(new llvm::TargetData(*::executionEngine->getTargetData()));
285                         passManager->add(llvm::createScalarReplAggregatesPass());
286
287                         for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
288                         {
289                                 switch(optimization[pass])
290                                 {
291                                 case Disabled:                                                                       break;
292                                 case CFGSimplification:    passManager->add(llvm::createCFGSimplificationPass());    break;
293                                 case LICM:                 passManager->add(llvm::createLICMPass());                 break;
294                                 case AggressiveDCE:        passManager->add(llvm::createAggressiveDCEPass());        break;
295                                 case GVN:                  passManager->add(llvm::createGVNPass());                  break;
296                                 case InstructionCombining: passManager->add(llvm::createInstructionCombiningPass()); break;
297                                 case Reassociate:          passManager->add(llvm::createReassociatePass());          break;
298                                 case DeadStoreElimination: passManager->add(llvm::createDeadStoreEliminationPass()); break;
299                                 case SCCP:                 passManager->add(llvm::createSCCPPass());                 break;
300                                 case ScalarReplAggregates: passManager->add(llvm::createScalarReplAggregatesPass()); break;
301                                 default:
302                                         assert(false);
303                                 }
304                         }
305                 }
306
307                 passManager->run(*::module);
308         }
309
310         Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
311         {
312                 // Need to allocate it in the entry block for mem2reg to work
313                 llvm::BasicBlock &entryBlock = ::function->getEntryBlock();
314
315                 llvm::Instruction *declaration;
316
317                 if(arraySize)
318                 {
319                         declaration = new llvm::AllocaInst(T(type), Nucleus::createConstantInt(arraySize));
320                 }
321                 else
322                 {
323                         declaration = new llvm::AllocaInst(T(type), (Value*)nullptr);
324                 }
325
326                 entryBlock.getInstList().push_front(declaration);
327
328                 return V(declaration);
329         }
330
331         BasicBlock *Nucleus::createBasicBlock()
332         {
333                 return B(BasicBlock::Create(*::context, "", ::function));
334         }
335
336         BasicBlock *Nucleus::getInsertBlock()
337         {
338                 return B(::builder->GetInsertBlock());
339         }
340
341         void Nucleus::setInsertBlock(BasicBlock *basicBlock)
342         {
343         //      assert(::builder->GetInsertBlock()->back().isTerminator());
344                 return ::builder->SetInsertPoint(basicBlock);
345         }
346
347         void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
348         {
349                 llvm::FunctionType *functionType = llvm::FunctionType::get(T(ReturnType), T(Params), false);
350                 ::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", ::module);
351                 ::function->setCallingConv(llvm::CallingConv::C);
352
353                 ::builder->SetInsertPoint(BasicBlock::Create(*::context, "", ::function));
354         }
355
356         Value *Nucleus::getArgument(unsigned int index)
357         {
358                 llvm::Function::arg_iterator args = ::function->arg_begin();
359
360                 while(index)
361                 {
362                         args++;
363                         index--;
364                 }
365
366                 return V(&*args);
367         }
368
369         void Nucleus::createRetVoid()
370         {
371                 ::builder->CreateRetVoid();
372         }
373
374         void Nucleus::createRet(Value *v)
375         {
376                 ::builder->CreateRet(v);
377         }
378
379         void Nucleus::createBr(BasicBlock *dest)
380         {
381                 ::builder->CreateBr(dest);
382         }
383
384         void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
385         {
386                 ::builder->CreateCondBr(cond, ifTrue, ifFalse);
387         }
388
389         Value *Nucleus::createAdd(Value *lhs, Value *rhs)
390         {
391                 return V(::builder->CreateAdd(lhs, rhs));
392         }
393
394         Value *Nucleus::createSub(Value *lhs, Value *rhs)
395         {
396                 return V(::builder->CreateSub(lhs, rhs));
397         }
398
399         Value *Nucleus::createMul(Value *lhs, Value *rhs)
400         {
401                 return V(::builder->CreateMul(lhs, rhs));
402         }
403
404         Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
405         {
406                 return V(::builder->CreateUDiv(lhs, rhs));
407         }
408
409         Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
410         {
411                 return V(::builder->CreateSDiv(lhs, rhs));
412         }
413
414         Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
415         {
416                 return V(::builder->CreateFAdd(lhs, rhs));
417         }
418
419         Value *Nucleus::createFSub(Value *lhs, Value *rhs)
420         {
421                 return V(::builder->CreateFSub(lhs, rhs));
422         }
423
424         Value *Nucleus::createFMul(Value *lhs, Value *rhs)
425         {
426                 return V(::builder->CreateFMul(lhs, rhs));
427         }
428
429         Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
430         {
431                 return V(::builder->CreateFDiv(lhs, rhs));
432         }
433
434         Value *Nucleus::createURem(Value *lhs, Value *rhs)
435         {
436                 return V(::builder->CreateURem(lhs, rhs));
437         }
438
439         Value *Nucleus::createSRem(Value *lhs, Value *rhs)
440         {
441                 return V(::builder->CreateSRem(lhs, rhs));
442         }
443
444         Value *Nucleus::createFRem(Value *lhs, Value *rhs)
445         {
446                 return V(::builder->CreateFRem(lhs, rhs));
447         }
448
449         Value *Nucleus::createShl(Value *lhs, Value *rhs)
450         {
451                 return V(::builder->CreateShl(lhs, rhs));
452         }
453
454         Value *Nucleus::createLShr(Value *lhs, Value *rhs)
455         {
456                 return V(::builder->CreateLShr(lhs, rhs));
457         }
458
459         Value *Nucleus::createAShr(Value *lhs, Value *rhs)
460         {
461                 return V(::builder->CreateAShr(lhs, rhs));
462         }
463
464         Value *Nucleus::createAnd(Value *lhs, Value *rhs)
465         {
466                 return V(::builder->CreateAnd(lhs, rhs));
467         }
468
469         Value *Nucleus::createOr(Value *lhs, Value *rhs)
470         {
471                 return V(::builder->CreateOr(lhs, rhs));
472         }
473
474         Value *Nucleus::createXor(Value *lhs, Value *rhs)
475         {
476                 return V(::builder->CreateXor(lhs, rhs));
477         }
478
479         Value *Nucleus::createNeg(Value *v)
480         {
481                 return V(::builder->CreateNeg(v));
482         }
483
484         Value *Nucleus::createFNeg(Value *v)
485         {
486                 return V(::builder->CreateFNeg(v));
487         }
488
489         Value *Nucleus::createNot(Value *v)
490         {
491                 return V(::builder->CreateNot(v));
492         }
493
494         Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int alignment)
495         {
496                 uintptr_t t = reinterpret_cast<uintptr_t>(type);
497                 if(t < EmulatedTypeCount)
498                 {
499                         switch(t)
500                         {
501                         case Type_v2i32:
502                         case Type_v4i16:
503                         case Type_v8i8:
504                         case Type_v2f32:
505                                 return createBitCast(createInsertElement(V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2))), createLoad(createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment), 0), T(T(type)));
506                         case Type_v2i16:
507                         case Type_v4i8:
508                                 if(alignment != 0)   // Not a local variable (all vectors are 128-bit).
509                                 {
510                                         Value *u = V(llvm::UndefValue::get(llvm::VectorType::get(T(Long::getType()), 2)));
511                                         Value *i = V(createLoad(createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment));
512                                         i = createZExt(i, Long::getType());
513                                         Value *v = V(createInsertElement(u, i, 0));
514                                         return createBitCast(v, T(T(type)));
515                                 }
516                                 break;
517                         default:
518                                 assert(false);
519                         }
520                 }
521
522                 assert(ptr->getType()->getContainedType(0) == T(type));
523                 return V(::builder->Insert(new llvm::LoadInst(ptr, "", isVolatile, alignment)));
524         }
525
526         Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int alignment)
527         {
528                 uintptr_t t = reinterpret_cast<uintptr_t>(type);
529                 if(t < EmulatedTypeCount)
530                 {
531                         switch(t)
532                         {
533                         case Type_v2i32:
534                         case Type_v4i16:
535                         case Type_v8i8:
536                         case Type_v2f32:
537                                 createStore(createExtractElement(createBitCast(value, T(llvm::VectorType::get(T(Long::getType()), 2))), Long::getType(), 0), createBitCast(ptr, Pointer<Long>::getType()), Long::getType(), isVolatile, alignment);
538                                 return value;
539                         case Type_v2i16:
540                         case Type_v4i8:
541                                 if(alignment != 0)   // Not a local variable (all vectors are 128-bit).
542                                 {
543                                         createStore(createExtractElement(createBitCast(value, Int4::getType()), Int::getType(), 0), createBitCast(ptr, Pointer<Int>::getType()), Int::getType(), isVolatile, alignment);
544                                         return value;
545                                 }
546                                 break;
547                         default:
548                                 assert(false);
549                         }
550                 }
551
552                 assert(ptr->getType()->getContainedType(0) == T(type));
553                 ::builder->Insert(new llvm::StoreInst(value, ptr, isVolatile, alignment));
554                 return value;
555         }
556
557         Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index, bool unsignedIndex)
558         {
559                 if(sizeof(void*) == 8)
560                 {
561                         if(unsignedIndex)
562                         {
563                                 index = createZExt(index, Long::getType());
564                         }
565                         else
566                         {
567                                 index = createSExt(index, Long::getType());
568                         }
569
570                         index = createMul(index, createConstantLong((int64_t)typeSize(type)));
571                 }
572                 else
573                 {
574                         index = createMul(index, createConstantInt((int)typeSize(type)));
575                 }
576
577                 assert(ptr->getType()->getContainedType(0) == T(type));
578                 return createBitCast(V(::builder->CreateGEP(createBitCast(ptr, T(llvm::PointerType::get(T(Byte::getType()), 0))), index)), T(llvm::PointerType::get(T(type), 0)));
579         }
580
581         Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
582         {
583                 return V(::builder->CreateAtomicRMW(llvm::AtomicRMWInst::Add, ptr, value, llvm::SequentiallyConsistent));
584         }
585
586         Value *Nucleus::createTrunc(Value *v, Type *destType)
587         {
588                 return V(::builder->CreateTrunc(v, T(destType)));
589         }
590
591         Value *Nucleus::createZExt(Value *v, Type *destType)
592         {
593                 return V(::builder->CreateZExt(v, T(destType)));
594         }
595
596         Value *Nucleus::createSExt(Value *v, Type *destType)
597         {
598                 return V(::builder->CreateSExt(v, T(destType)));
599         }
600
601         Value *Nucleus::createFPToSI(Value *v, Type *destType)
602         {
603                 return V(::builder->CreateFPToSI(v, T(destType)));
604         }
605
606         Value *Nucleus::createSIToFP(Value *v, Type *destType)
607         {
608                 return V(::builder->CreateSIToFP(v, T(destType)));
609         }
610
611         Value *Nucleus::createFPTrunc(Value *v, Type *destType)
612         {
613                 return V(::builder->CreateFPTrunc(v, T(destType)));
614         }
615
616         Value *Nucleus::createFPExt(Value *v, Type *destType)
617         {
618                 return V(::builder->CreateFPExt(v, T(destType)));
619         }
620
621         Value *Nucleus::createBitCast(Value *v, Type *destType)
622         {
623                 // Bitcasts must be between types of the same logical size. But with emulated narrow vectors we need
624                 // support for casting between scalars and wide vectors. Emulate them by writing to the stack and
625                 // reading back as the destination type.
626                 if(!v->getType()->isVectorTy() && T(destType)->isVectorTy())
627                 {
628                         Value *readAddress = allocateStackVariable(destType);
629                         Value *writeAddress = createBitCast(readAddress, T(llvm::PointerType::get(v->getType(), 0)));
630                         createStore(v, writeAddress, T(v->getType()));
631                         return createLoad(readAddress, destType);
632                 }
633                 else if(v->getType()->isVectorTy() && !T(destType)->isVectorTy())
634                 {
635                         Value *writeAddress = allocateStackVariable(T(v->getType()));
636                         createStore(v, writeAddress, T(v->getType()));
637                         Value *readAddress = createBitCast(writeAddress, T(llvm::PointerType::get(T(destType), 0)));
638                         return createLoad(readAddress, destType);
639                 }
640
641                 return V(::builder->CreateBitCast(v, T(destType)));
642         }
643
644         Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
645         {
646                 return V(::builder->CreateICmpEQ(lhs, rhs));
647         }
648
649         Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
650         {
651                 return V(::builder->CreateICmpNE(lhs, rhs));
652         }
653
654         Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
655         {
656                 return V(::builder->CreateICmpUGT(lhs, rhs));
657         }
658
659         Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
660         {
661                 return V(::builder->CreateICmpUGE(lhs, rhs));
662         }
663
664         Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
665         {
666                 return V(::builder->CreateICmpULT(lhs, rhs));
667         }
668
669         Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
670         {
671                 return V(::builder->CreateICmpULE(lhs, rhs));
672         }
673
674         Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
675         {
676                 return V(::builder->CreateICmpSGT(lhs, rhs));
677         }
678
679         Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
680         {
681                 return V(::builder->CreateICmpSGE(lhs, rhs));
682         }
683
684         Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
685         {
686                 return V(::builder->CreateICmpSLT(lhs, rhs));
687         }
688
689         Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
690         {
691                 return V(::builder->CreateICmpSLE(lhs, rhs));
692         }
693
694         Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
695         {
696                 return V(::builder->CreateFCmpOEQ(lhs, rhs));
697         }
698
699         Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
700         {
701                 return V(::builder->CreateFCmpOGT(lhs, rhs));
702         }
703
704         Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
705         {
706                 return V(::builder->CreateFCmpOGE(lhs, rhs));
707         }
708
709         Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
710         {
711                 return V(::builder->CreateFCmpOLT(lhs, rhs));
712         }
713
714         Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
715         {
716                 return V(::builder->CreateFCmpOLE(lhs, rhs));
717         }
718
719         Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
720         {
721                 return V(::builder->CreateFCmpONE(lhs, rhs));
722         }
723
724         Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
725         {
726                 return V(::builder->CreateFCmpORD(lhs, rhs));
727         }
728
729         Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
730         {
731                 return V(::builder->CreateFCmpUNO(lhs, rhs));
732         }
733
734         Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
735         {
736                 return V(::builder->CreateFCmpUEQ(lhs, rhs));
737         }
738
739         Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
740         {
741                 return V(::builder->CreateFCmpUGT(lhs, rhs));
742         }
743
744         Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
745         {
746                 return V(::builder->CreateFCmpUGE(lhs, rhs));
747         }
748
749         Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
750         {
751                 return V(::builder->CreateFCmpULT(lhs, rhs));
752         }
753
754         Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
755         {
756                 return V(::builder->CreateFCmpULE(lhs, rhs));
757         }
758
759         Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
760         {
761                 return V(::builder->CreateFCmpULE(lhs, rhs));
762         }
763
764         Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
765         {
766                 assert(vector->getType()->getContainedType(0) == T(type));
767                 return V(::builder->CreateExtractElement(vector, createConstantInt(index)));
768         }
769
770         Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
771         {
772                 return V(::builder->CreateInsertElement(vector, element, createConstantInt(index)));
773         }
774
775         Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
776         {
777                 int size = llvm::cast<llvm::VectorType>(V1->getType())->getNumElements();
778                 const int maxSize = 16;
779                 llvm::Constant *swizzle[maxSize];
780                 assert(size <= maxSize);
781
782                 for(int i = 0; i < size; i++)
783                 {
784                         swizzle[i] = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), select[i]);
785                 }
786
787                 llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size));
788
789                 return V(::builder->CreateShuffleVector(V1, V2, shuffle));
790         }
791
792         Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
793         {
794                 return V(::builder->CreateSelect(C, ifTrue, ifFalse));
795         }
796
797         SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
798         {
799                 return reinterpret_cast<SwitchCases*>(::builder->CreateSwitch(control, defaultBranch, numCases));
800         }
801
802         void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
803         {
804                 switchCases->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), label, true), branch);
805         }
806
807         void Nucleus::createUnreachable()
808         {
809                 ::builder->CreateUnreachable();
810         }
811
812         static Value *createSwizzle4(Value *val, unsigned char select)
813         {
814                 int swizzle[4] =
815                 {
816                         (select >> 0) & 0x03,
817                         (select >> 2) & 0x03,
818                         (select >> 4) & 0x03,
819                         (select >> 6) & 0x03,
820                 };
821
822                 return Nucleus::createShuffleVector(val, val, swizzle);
823         }
824
825         static Value *createMask4(Value *lhs, Value *rhs, unsigned char select)
826         {
827                 bool mask[4] = {false, false, false, false};
828
829                 mask[(select >> 0) & 0x03] = true;
830                 mask[(select >> 2) & 0x03] = true;
831                 mask[(select >> 4) & 0x03] = true;
832                 mask[(select >> 6) & 0x03] = true;
833
834                 int swizzle[4] =
835                 {
836                         mask[0] ? 4 : 0,
837                         mask[1] ? 5 : 1,
838                         mask[2] ? 6 : 2,
839                         mask[3] ? 7 : 3,
840                 };
841
842                 return Nucleus::createShuffleVector(lhs, rhs, swizzle);
843         }
844
845         Type *Nucleus::getPointerType(Type *ElementType)
846         {
847                 return T(llvm::PointerType::get(T(ElementType), 0));
848         }
849
850         Value *Nucleus::createNullValue(Type *Ty)
851         {
852                 return V(llvm::Constant::getNullValue(T(Ty)));
853         }
854
855         Value *Nucleus::createConstantLong(int64_t i)
856         {
857                 return V(llvm::ConstantInt::get(llvm::Type::getInt64Ty(*::context), i, true));
858         }
859
860         Value *Nucleus::createConstantInt(int i)
861         {
862                 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, true));
863         }
864
865         Value *Nucleus::createConstantInt(unsigned int i)
866         {
867                 return V(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*::context), i, false));
868         }
869
870         Value *Nucleus::createConstantBool(bool b)
871         {
872                 return V(llvm::ConstantInt::get(llvm::Type::getInt1Ty(*::context), b));
873         }
874
875         Value *Nucleus::createConstantByte(signed char i)
876         {
877                 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, true));
878         }
879
880         Value *Nucleus::createConstantByte(unsigned char i)
881         {
882                 return V(llvm::ConstantInt::get(llvm::Type::getInt8Ty(*::context), i, false));
883         }
884
885         Value *Nucleus::createConstantShort(short i)
886         {
887                 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, true));
888         }
889
890         Value *Nucleus::createConstantShort(unsigned short i)
891         {
892                 return V(llvm::ConstantInt::get(llvm::Type::getInt16Ty(*::context), i, false));
893         }
894
895         Value *Nucleus::createConstantFloat(float x)
896         {
897                 return V(llvm::ConstantFP::get(T(Float::getType()), x));
898         }
899
900         Value *Nucleus::createNullPointer(Type *Ty)
901         {
902                 return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(T(Ty), 0)));
903         }
904
905         Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
906         {
907                 assert(llvm::isa<llvm::VectorType>(T(type)));
908                 const int numConstants = llvm::cast<llvm::VectorType>(T(type))->getNumElements();
909                 assert(numConstants <= 16);
910                 llvm::Constant *constantVector[16];
911
912                 for(int i = 0; i < numConstants; i++)
913                 {
914                         constantVector[i] = llvm::ConstantInt::get(T(type)->getContainedType(0), constants[i]);
915                 }
916
917                 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numConstants)));
918         }
919
920         Value *Nucleus::createConstantVector(const double *constants, Type *type)
921         {
922                 assert(llvm::isa<llvm::VectorType>(T(type)));
923                 const int numConstants = llvm::cast<llvm::VectorType>(T(type))->getNumElements();
924                 assert(numConstants <= 8);
925                 llvm::Constant *constantVector[8];
926
927                 for(int i = 0; i < numConstants; i++)
928                 {
929                         constantVector[i] = llvm::ConstantFP::get(T(type)->getContainedType(0), constants[i]);
930                 }
931
932                 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numConstants)));
933         }
934
935         Type *Void::getType()
936         {
937                 return T(llvm::Type::getVoidTy(*::context));
938         }
939
940         Bool::Bool(Argument<Bool> argument)
941         {
942                 storeValue(argument.value);
943         }
944
945         Bool::Bool(bool x)
946         {
947                 storeValue(Nucleus::createConstantBool(x));
948         }
949
950         Bool::Bool(RValue<Bool> rhs)
951         {
952                 storeValue(rhs.value);
953         }
954
955         Bool::Bool(const Bool &rhs)
956         {
957                 Value *value = rhs.loadValue();
958                 storeValue(value);
959         }
960
961         Bool::Bool(const Reference<Bool> &rhs)
962         {
963                 Value *value = rhs.loadValue();
964                 storeValue(value);
965         }
966
967         RValue<Bool> Bool::operator=(RValue<Bool> rhs)
968         {
969                 storeValue(rhs.value);
970
971                 return rhs;
972         }
973
974         RValue<Bool> Bool::operator=(const Bool &rhs)
975         {
976                 Value *value = rhs.loadValue();
977                 storeValue(value);
978
979                 return RValue<Bool>(value);
980         }
981
982         RValue<Bool> Bool::operator=(const Reference<Bool> &rhs)
983         {
984                 Value *value = rhs.loadValue();
985                 storeValue(value);
986
987                 return RValue<Bool>(value);
988         }
989
990         RValue<Bool> operator!(RValue<Bool> val)
991         {
992                 return RValue<Bool>(Nucleus::createNot(val.value));
993         }
994
995         RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs)
996         {
997                 return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value));
998         }
999
1000         RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs)
1001         {
1002                 return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value));
1003         }
1004
1005         Type *Bool::getType()
1006         {
1007                 return T(llvm::Type::getInt1Ty(*::context));
1008         }
1009
1010         Byte::Byte(Argument<Byte> argument)
1011         {
1012                 storeValue(argument.value);
1013         }
1014
1015         Byte::Byte(RValue<Int> cast)
1016         {
1017                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1018
1019                 storeValue(integer);
1020         }
1021
1022         Byte::Byte(RValue<UInt> cast)
1023         {
1024                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1025
1026                 storeValue(integer);
1027         }
1028
1029         Byte::Byte(RValue<UShort> cast)
1030         {
1031                 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
1032
1033                 storeValue(integer);
1034         }
1035
1036         Byte::Byte(int x)
1037         {
1038                 storeValue(Nucleus::createConstantByte((unsigned char)x));
1039         }
1040
1041         Byte::Byte(unsigned char x)
1042         {
1043                 storeValue(Nucleus::createConstantByte(x));
1044         }
1045
1046         Byte::Byte(RValue<Byte> rhs)
1047         {
1048                 storeValue(rhs.value);
1049         }
1050
1051         Byte::Byte(const Byte &rhs)
1052         {
1053                 Value *value = rhs.loadValue();
1054                 storeValue(value);
1055         }
1056
1057         Byte::Byte(const Reference<Byte> &rhs)
1058         {
1059                 Value *value = rhs.loadValue();
1060                 storeValue(value);
1061         }
1062
1063         RValue<Byte> Byte::operator=(RValue<Byte> rhs)
1064         {
1065                 storeValue(rhs.value);
1066
1067                 return rhs;
1068         }
1069
1070         RValue<Byte> Byte::operator=(const Byte &rhs)
1071         {
1072                 Value *value = rhs.loadValue();
1073                 storeValue(value);
1074
1075                 return RValue<Byte>(value);
1076         }
1077
1078         RValue<Byte> Byte::operator=(const Reference<Byte> &rhs)
1079         {
1080                 Value *value = rhs.loadValue();
1081                 storeValue(value);
1082
1083                 return RValue<Byte>(value);
1084         }
1085
1086         RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs)
1087         {
1088                 return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value));
1089         }
1090
1091         RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs)
1092         {
1093                 return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value));
1094         }
1095
1096         RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs)
1097         {
1098                 return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value));
1099         }
1100
1101         RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs)
1102         {
1103                 return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value));
1104         }
1105
1106         RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs)
1107         {
1108                 return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value));
1109         }
1110
1111         RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs)
1112         {
1113                 return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value));
1114         }
1115
1116         RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs)
1117         {
1118                 return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value));
1119         }
1120
1121         RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs)
1122         {
1123                 return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value));
1124         }
1125
1126         RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs)
1127         {
1128                 return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value));
1129         }
1130
1131         RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs)
1132         {
1133                 return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value));
1134         }
1135
1136         RValue<Byte> operator+=(Byte &lhs, RValue<Byte> rhs)
1137         {
1138                 return lhs = lhs + rhs;
1139         }
1140
1141         RValue<Byte> operator-=(Byte &lhs, RValue<Byte> rhs)
1142         {
1143                 return lhs = lhs - rhs;
1144         }
1145
1146         RValue<Byte> operator*=(Byte &lhs, RValue<Byte> rhs)
1147         {
1148                 return lhs = lhs * rhs;
1149         }
1150
1151         RValue<Byte> operator/=(Byte &lhs, RValue<Byte> rhs)
1152         {
1153                 return lhs = lhs / rhs;
1154         }
1155
1156         RValue<Byte> operator%=(Byte &lhs, RValue<Byte> rhs)
1157         {
1158                 return lhs = lhs % rhs;
1159         }
1160
1161         RValue<Byte> operator&=(Byte &lhs, RValue<Byte> rhs)
1162         {
1163                 return lhs = lhs & rhs;
1164         }
1165
1166         RValue<Byte> operator|=(Byte &lhs, RValue<Byte> rhs)
1167         {
1168                 return lhs = lhs | rhs;
1169         }
1170
1171         RValue<Byte> operator^=(Byte &lhs, RValue<Byte> rhs)
1172         {
1173                 return lhs = lhs ^ rhs;
1174         }
1175
1176         RValue<Byte> operator<<=(Byte &lhs, RValue<Byte> rhs)
1177         {
1178                 return lhs = lhs << rhs;
1179         }
1180
1181         RValue<Byte> operator>>=(Byte &lhs, RValue<Byte> rhs)
1182         {
1183                 return lhs = lhs >> rhs;
1184         }
1185
1186         RValue<Byte> operator+(RValue<Byte> val)
1187         {
1188                 return val;
1189         }
1190
1191         RValue<Byte> operator-(RValue<Byte> val)
1192         {
1193                 return RValue<Byte>(Nucleus::createNeg(val.value));
1194         }
1195
1196         RValue<Byte> operator~(RValue<Byte> val)
1197         {
1198                 return RValue<Byte>(Nucleus::createNot(val.value));
1199         }
1200
1201         RValue<Byte> operator++(Byte &val, int)   // Post-increment
1202         {
1203                 RValue<Byte> res = val;
1204
1205                 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantByte((unsigned char)1)));
1206                 val.storeValue(inc);
1207
1208                 return res;
1209         }
1210
1211         const Byte &operator++(Byte &val)   // Pre-increment
1212         {
1213                 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantByte((unsigned char)1)));
1214                 val.storeValue(inc);
1215
1216                 return val;
1217         }
1218
1219         RValue<Byte> operator--(Byte &val, int)   // Post-decrement
1220         {
1221                 RValue<Byte> res = val;
1222
1223                 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantByte((unsigned char)1)));
1224                 val.storeValue(inc);
1225
1226                 return res;
1227         }
1228
1229         const Byte &operator--(Byte &val)   // Pre-decrement
1230         {
1231                 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantByte((unsigned char)1)));
1232                 val.storeValue(inc);
1233
1234                 return val;
1235         }
1236
1237         RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs)
1238         {
1239                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1240         }
1241
1242         RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs)
1243         {
1244                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1245         }
1246
1247         RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs)
1248         {
1249                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1250         }
1251
1252         RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs)
1253         {
1254                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1255         }
1256
1257         RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs)
1258         {
1259                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1260         }
1261
1262         RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs)
1263         {
1264                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1265         }
1266
1267         Type *Byte::getType()
1268         {
1269                 return T(llvm::Type::getInt8Ty(*::context));
1270         }
1271
1272         SByte::SByte(Argument<SByte> argument)
1273         {
1274                 storeValue(argument.value);
1275         }
1276
1277         SByte::SByte(RValue<Int> cast)
1278         {
1279                 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1280
1281                 storeValue(integer);
1282         }
1283
1284         SByte::SByte(RValue<Short> cast)
1285         {
1286                 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1287
1288                 storeValue(integer);
1289         }
1290
1291         SByte::SByte(signed char x)
1292         {
1293                 storeValue(Nucleus::createConstantByte(x));
1294         }
1295
1296         SByte::SByte(RValue<SByte> rhs)
1297         {
1298                 storeValue(rhs.value);
1299         }
1300
1301         SByte::SByte(const SByte &rhs)
1302         {
1303                 Value *value = rhs.loadValue();
1304                 storeValue(value);
1305         }
1306
1307         SByte::SByte(const Reference<SByte> &rhs)
1308         {
1309                 Value *value = rhs.loadValue();
1310                 storeValue(value);
1311         }
1312
1313         RValue<SByte> SByte::operator=(RValue<SByte> rhs)
1314         {
1315                 storeValue(rhs.value);
1316
1317                 return rhs;
1318         }
1319
1320         RValue<SByte> SByte::operator=(const SByte &rhs)
1321         {
1322                 Value *value = rhs.loadValue();
1323                 storeValue(value);
1324
1325                 return RValue<SByte>(value);
1326         }
1327
1328         RValue<SByte> SByte::operator=(const Reference<SByte> &rhs)
1329         {
1330                 Value *value = rhs.loadValue();
1331                 storeValue(value);
1332
1333                 return RValue<SByte>(value);
1334         }
1335
1336         RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs)
1337         {
1338                 return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value));
1339         }
1340
1341         RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs)
1342         {
1343                 return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value));
1344         }
1345
1346         RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs)
1347         {
1348                 return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value));
1349         }
1350
1351         RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs)
1352         {
1353                 return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value));
1354         }
1355
1356         RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs)
1357         {
1358                 return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value));
1359         }
1360
1361         RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs)
1362         {
1363                 return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value));
1364         }
1365
1366         RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs)
1367         {
1368                 return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value));
1369         }
1370
1371         RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs)
1372         {
1373                 return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value));
1374         }
1375
1376         RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs)
1377         {
1378                 return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value));
1379         }
1380
1381         RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs)
1382         {
1383                 return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value));
1384         }
1385
1386         RValue<SByte> operator+=(SByte &lhs, RValue<SByte> rhs)
1387         {
1388                 return lhs = lhs + rhs;
1389         }
1390
1391         RValue<SByte> operator-=(SByte &lhs, RValue<SByte> rhs)
1392         {
1393                 return lhs = lhs - rhs;
1394         }
1395
1396         RValue<SByte> operator*=(SByte &lhs, RValue<SByte> rhs)
1397         {
1398                 return lhs = lhs * rhs;
1399         }
1400
1401         RValue<SByte> operator/=(SByte &lhs, RValue<SByte> rhs)
1402         {
1403                 return lhs = lhs / rhs;
1404         }
1405
1406         RValue<SByte> operator%=(SByte &lhs, RValue<SByte> rhs)
1407         {
1408                 return lhs = lhs % rhs;
1409         }
1410
1411         RValue<SByte> operator&=(SByte &lhs, RValue<SByte> rhs)
1412         {
1413                 return lhs = lhs & rhs;
1414         }
1415
1416         RValue<SByte> operator|=(SByte &lhs, RValue<SByte> rhs)
1417         {
1418                 return lhs = lhs | rhs;
1419         }
1420
1421         RValue<SByte> operator^=(SByte &lhs, RValue<SByte> rhs)
1422         {
1423                 return lhs = lhs ^ rhs;
1424         }
1425
1426         RValue<SByte> operator<<=(SByte &lhs, RValue<SByte> rhs)
1427         {
1428                 return lhs = lhs << rhs;
1429         }
1430
1431         RValue<SByte> operator>>=(SByte &lhs, RValue<SByte> rhs)
1432         {
1433                 return lhs = lhs >> rhs;
1434         }
1435
1436         RValue<SByte> operator+(RValue<SByte> val)
1437         {
1438                 return val;
1439         }
1440
1441         RValue<SByte> operator-(RValue<SByte> val)
1442         {
1443                 return RValue<SByte>(Nucleus::createNeg(val.value));
1444         }
1445
1446         RValue<SByte> operator~(RValue<SByte> val)
1447         {
1448                 return RValue<SByte>(Nucleus::createNot(val.value));
1449         }
1450
1451         RValue<SByte> operator++(SByte &val, int)   // Post-increment
1452         {
1453                 RValue<SByte> res = val;
1454
1455                 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantByte((signed char)1)));
1456                 val.storeValue(inc);
1457
1458                 return res;
1459         }
1460
1461         const SByte &operator++(SByte &val)   // Pre-increment
1462         {
1463                 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantByte((signed char)1)));
1464                 val.storeValue(inc);
1465
1466                 return val;
1467         }
1468
1469         RValue<SByte> operator--(SByte &val, int)   // Post-decrement
1470         {
1471                 RValue<SByte> res = val;
1472
1473                 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantByte((signed char)1)));
1474                 val.storeValue(inc);
1475
1476                 return res;
1477         }
1478
1479         const SByte &operator--(SByte &val)   // Pre-decrement
1480         {
1481                 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantByte((signed char)1)));
1482                 val.storeValue(inc);
1483
1484                 return val;
1485         }
1486
1487         RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs)
1488         {
1489                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
1490         }
1491
1492         RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs)
1493         {
1494                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
1495         }
1496
1497         RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs)
1498         {
1499                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
1500         }
1501
1502         RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs)
1503         {
1504                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
1505         }
1506
1507         RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs)
1508         {
1509                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1510         }
1511
1512         RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs)
1513         {
1514                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1515         }
1516
1517         Type *SByte::getType()
1518         {
1519                 return T(llvm::Type::getInt8Ty(*::context));
1520         }
1521
1522         Short::Short(Argument<Short> argument)
1523         {
1524                 storeValue(argument.value);
1525         }
1526
1527         Short::Short(RValue<Int> cast)
1528         {
1529                 Value *integer = Nucleus::createTrunc(cast.value, Short::getType());
1530
1531                 storeValue(integer);
1532         }
1533
1534         Short::Short(short x)
1535         {
1536                 storeValue(Nucleus::createConstantShort(x));
1537         }
1538
1539         Short::Short(RValue<Short> rhs)
1540         {
1541                 storeValue(rhs.value);
1542         }
1543
1544         Short::Short(const Short &rhs)
1545         {
1546                 Value *value = rhs.loadValue();
1547                 storeValue(value);
1548         }
1549
1550         Short::Short(const Reference<Short> &rhs)
1551         {
1552                 Value *value = rhs.loadValue();
1553                 storeValue(value);
1554         }
1555
1556         RValue<Short> Short::operator=(RValue<Short> rhs)
1557         {
1558                 storeValue(rhs.value);
1559
1560                 return rhs;
1561         }
1562
1563         RValue<Short> Short::operator=(const Short &rhs)
1564         {
1565                 Value *value = rhs.loadValue();
1566                 storeValue(value);
1567
1568                 return RValue<Short>(value);
1569         }
1570
1571         RValue<Short> Short::operator=(const Reference<Short> &rhs)
1572         {
1573                 Value *value = rhs.loadValue();
1574                 storeValue(value);
1575
1576                 return RValue<Short>(value);
1577         }
1578
1579         RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs)
1580         {
1581                 return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value));
1582         }
1583
1584         RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs)
1585         {
1586                 return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value));
1587         }
1588
1589         RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs)
1590         {
1591                 return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value));
1592         }
1593
1594         RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs)
1595         {
1596                 return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value));
1597         }
1598
1599         RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs)
1600         {
1601                 return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value));
1602         }
1603
1604         RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs)
1605         {
1606                 return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value));
1607         }
1608
1609         RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs)
1610         {
1611                 return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value));
1612         }
1613
1614         RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs)
1615         {
1616                 return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value));
1617         }
1618
1619         RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs)
1620         {
1621                 return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value));
1622         }
1623
1624         RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs)
1625         {
1626                 return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value));
1627         }
1628
1629         RValue<Short> operator+=(Short &lhs, RValue<Short> rhs)
1630         {
1631                 return lhs = lhs + rhs;
1632         }
1633
1634         RValue<Short> operator-=(Short &lhs, RValue<Short> rhs)
1635         {
1636                 return lhs = lhs - rhs;
1637         }
1638
1639         RValue<Short> operator*=(Short &lhs, RValue<Short> rhs)
1640         {
1641                 return lhs = lhs * rhs;
1642         }
1643
1644         RValue<Short> operator/=(Short &lhs, RValue<Short> rhs)
1645         {
1646                 return lhs = lhs / rhs;
1647         }
1648
1649         RValue<Short> operator%=(Short &lhs, RValue<Short> rhs)
1650         {
1651                 return lhs = lhs % rhs;
1652         }
1653
1654         RValue<Short> operator&=(Short &lhs, RValue<Short> rhs)
1655         {
1656                 return lhs = lhs & rhs;
1657         }
1658
1659         RValue<Short> operator|=(Short &lhs, RValue<Short> rhs)
1660         {
1661                 return lhs = lhs | rhs;
1662         }
1663
1664         RValue<Short> operator^=(Short &lhs, RValue<Short> rhs)
1665         {
1666                 return lhs = lhs ^ rhs;
1667         }
1668
1669         RValue<Short> operator<<=(Short &lhs, RValue<Short> rhs)
1670         {
1671                 return lhs = lhs << rhs;
1672         }
1673
1674         RValue<Short> operator>>=(Short &lhs, RValue<Short> rhs)
1675         {
1676                 return lhs = lhs >> rhs;
1677         }
1678
1679         RValue<Short> operator+(RValue<Short> val)
1680         {
1681                 return val;
1682         }
1683
1684         RValue<Short> operator-(RValue<Short> val)
1685         {
1686                 return RValue<Short>(Nucleus::createNeg(val.value));
1687         }
1688
1689         RValue<Short> operator~(RValue<Short> val)
1690         {
1691                 return RValue<Short>(Nucleus::createNot(val.value));
1692         }
1693
1694         RValue<Short> operator++(Short &val, int)   // Post-increment
1695         {
1696                 RValue<Short> res = val;
1697
1698                 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantShort((short)1)));
1699                 val.storeValue(inc);
1700
1701                 return res;
1702         }
1703
1704         const Short &operator++(Short &val)   // Pre-increment
1705         {
1706                 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantShort((short)1)));
1707                 val.storeValue(inc);
1708
1709                 return val;
1710         }
1711
1712         RValue<Short> operator--(Short &val, int)   // Post-decrement
1713         {
1714                 RValue<Short> res = val;
1715
1716                 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantShort((short)1)));
1717                 val.storeValue(inc);
1718
1719                 return res;
1720         }
1721
1722         const Short &operator--(Short &val)   // Pre-decrement
1723         {
1724                 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantShort((short)1)));
1725                 val.storeValue(inc);
1726
1727                 return val;
1728         }
1729
1730         RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs)
1731         {
1732                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
1733         }
1734
1735         RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs)
1736         {
1737                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
1738         }
1739
1740         RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs)
1741         {
1742                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
1743         }
1744
1745         RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs)
1746         {
1747                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
1748         }
1749
1750         RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs)
1751         {
1752                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1753         }
1754
1755         RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs)
1756         {
1757                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1758         }
1759
1760         Type *Short::getType()
1761         {
1762                 return T(llvm::Type::getInt16Ty(*::context));
1763         }
1764
1765         UShort::UShort(Argument<UShort> argument)
1766         {
1767                 storeValue(argument.value);
1768         }
1769
1770         UShort::UShort(RValue<UInt> cast)
1771         {
1772                 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
1773
1774                 storeValue(integer);
1775         }
1776
1777         UShort::UShort(RValue<Int> cast)
1778         {
1779                 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
1780
1781                 storeValue(integer);
1782         }
1783
1784         UShort::UShort(unsigned short x)
1785         {
1786                 storeValue(Nucleus::createConstantShort(x));
1787         }
1788
1789         UShort::UShort(RValue<UShort> rhs)
1790         {
1791                 storeValue(rhs.value);
1792         }
1793
1794         UShort::UShort(const UShort &rhs)
1795         {
1796                 Value *value = rhs.loadValue();
1797                 storeValue(value);
1798         }
1799
1800         UShort::UShort(const Reference<UShort> &rhs)
1801         {
1802                 Value *value = rhs.loadValue();
1803                 storeValue(value);
1804         }
1805
1806         RValue<UShort> UShort::operator=(RValue<UShort> rhs)
1807         {
1808                 storeValue(rhs.value);
1809
1810                 return rhs;
1811         }
1812
1813         RValue<UShort> UShort::operator=(const UShort &rhs)
1814         {
1815                 Value *value = rhs.loadValue();
1816                 storeValue(value);
1817
1818                 return RValue<UShort>(value);
1819         }
1820
1821         RValue<UShort> UShort::operator=(const Reference<UShort> &rhs)
1822         {
1823                 Value *value = rhs.loadValue();
1824                 storeValue(value);
1825
1826                 return RValue<UShort>(value);
1827         }
1828
1829         RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs)
1830         {
1831                 return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value));
1832         }
1833
1834         RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs)
1835         {
1836                 return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value));
1837         }
1838
1839         RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs)
1840         {
1841                 return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value));
1842         }
1843
1844         RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs)
1845         {
1846                 return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value));
1847         }
1848
1849         RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs)
1850         {
1851                 return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value));
1852         }
1853
1854         RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs)
1855         {
1856                 return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value));
1857         }
1858
1859         RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs)
1860         {
1861                 return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value));
1862         }
1863
1864         RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs)
1865         {
1866                 return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value));
1867         }
1868
1869         RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs)
1870         {
1871                 return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value));
1872         }
1873
1874         RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs)
1875         {
1876                 return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value));
1877         }
1878
1879         RValue<UShort> operator+=(UShort &lhs, RValue<UShort> rhs)
1880         {
1881                 return lhs = lhs + rhs;
1882         }
1883
1884         RValue<UShort> operator-=(UShort &lhs, RValue<UShort> rhs)
1885         {
1886                 return lhs = lhs - rhs;
1887         }
1888
1889         RValue<UShort> operator*=(UShort &lhs, RValue<UShort> rhs)
1890         {
1891                 return lhs = lhs * rhs;
1892         }
1893
1894         RValue<UShort> operator/=(UShort &lhs, RValue<UShort> rhs)
1895         {
1896                 return lhs = lhs / rhs;
1897         }
1898
1899         RValue<UShort> operator%=(UShort &lhs, RValue<UShort> rhs)
1900         {
1901                 return lhs = lhs % rhs;
1902         }
1903
1904         RValue<UShort> operator&=(UShort &lhs, RValue<UShort> rhs)
1905         {
1906                 return lhs = lhs & rhs;
1907         }
1908
1909         RValue<UShort> operator|=(UShort &lhs, RValue<UShort> rhs)
1910         {
1911                 return lhs = lhs | rhs;
1912         }
1913
1914         RValue<UShort> operator^=(UShort &lhs, RValue<UShort> rhs)
1915         {
1916                 return lhs = lhs ^ rhs;
1917         }
1918
1919         RValue<UShort> operator<<=(UShort &lhs, RValue<UShort> rhs)
1920         {
1921                 return lhs = lhs << rhs;
1922         }
1923
1924         RValue<UShort> operator>>=(UShort &lhs, RValue<UShort> rhs)
1925         {
1926                 return lhs = lhs >> rhs;
1927         }
1928
1929         RValue<UShort> operator+(RValue<UShort> val)
1930         {
1931                 return val;
1932         }
1933
1934         RValue<UShort> operator-(RValue<UShort> val)
1935         {
1936                 return RValue<UShort>(Nucleus::createNeg(val.value));
1937         }
1938
1939         RValue<UShort> operator~(RValue<UShort> val)
1940         {
1941                 return RValue<UShort>(Nucleus::createNot(val.value));
1942         }
1943
1944         RValue<UShort> operator++(UShort &val, int)   // Post-increment
1945         {
1946                 RValue<UShort> res = val;
1947
1948                 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantShort((unsigned short)1)));
1949                 val.storeValue(inc);
1950
1951                 return res;
1952         }
1953
1954         const UShort &operator++(UShort &val)   // Pre-increment
1955         {
1956                 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantShort((unsigned short)1)));
1957                 val.storeValue(inc);
1958
1959                 return val;
1960         }
1961
1962         RValue<UShort> operator--(UShort &val, int)   // Post-decrement
1963         {
1964                 RValue<UShort> res = val;
1965
1966                 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantShort((unsigned short)1)));
1967                 val.storeValue(inc);
1968
1969                 return res;
1970         }
1971
1972         const UShort &operator--(UShort &val)   // Pre-decrement
1973         {
1974                 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantShort((unsigned short)1)));
1975                 val.storeValue(inc);
1976
1977                 return val;
1978         }
1979
1980         RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs)
1981         {
1982                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1983         }
1984
1985         RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs)
1986         {
1987                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1988         }
1989
1990         RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs)
1991         {
1992                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1993         }
1994
1995         RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs)
1996         {
1997                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1998         }
1999
2000         RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs)
2001         {
2002                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
2003         }
2004
2005         RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs)
2006         {
2007                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
2008         }
2009
2010         Type *UShort::getType()
2011         {
2012                 return T(llvm::Type::getInt16Ty(*::context));
2013         }
2014
2015         Byte4::Byte4(RValue<Byte8> cast)
2016         {
2017                 storeValue(Nucleus::createBitCast(cast.value, getType()));
2018         }
2019
2020         Byte4::Byte4(const Reference<Byte4> &rhs)
2021         {
2022                 Value *value = rhs.loadValue();
2023                 storeValue(value);
2024         }
2025
2026         Type *Byte4::getType()
2027         {
2028                 return T(Type_v4i8);
2029         }
2030
2031         Type *SByte4::getType()
2032         {
2033                 return T(Type_v4i8);
2034         }
2035
2036         Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2037         {
2038                 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
2039                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
2040         }
2041
2042         Byte8::Byte8(RValue<Byte8> rhs)
2043         {
2044                 storeValue(rhs.value);
2045         }
2046
2047         Byte8::Byte8(const Byte8 &rhs)
2048         {
2049                 Value *value = rhs.loadValue();
2050                 storeValue(value);
2051         }
2052
2053         Byte8::Byte8(const Reference<Byte8> &rhs)
2054         {
2055                 Value *value = rhs.loadValue();
2056                 storeValue(value);
2057         }
2058
2059         RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs)
2060         {
2061                 storeValue(rhs.value);
2062
2063                 return rhs;
2064         }
2065
2066         RValue<Byte8> Byte8::operator=(const Byte8 &rhs)
2067         {
2068                 Value *value = rhs.loadValue();
2069                 storeValue(value);
2070
2071                 return RValue<Byte8>(value);
2072         }
2073
2074         RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs)
2075         {
2076                 Value *value = rhs.loadValue();
2077                 storeValue(value);
2078
2079                 return RValue<Byte8>(value);
2080         }
2081
2082         RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs)
2083         {
2084                 return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
2085         }
2086
2087         RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs)
2088         {
2089                 return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
2090         }
2091
2092 //      RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs)
2093 //      {
2094 //              return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value));
2095 //      }
2096
2097 //      RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs)
2098 //      {
2099 //              return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value));
2100 //      }
2101
2102 //      RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs)
2103 //      {
2104 //              return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value));
2105 //      }
2106
2107         RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs)
2108         {
2109                 return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
2110         }
2111
2112         RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs)
2113         {
2114                 return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
2115         }
2116
2117         RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs)
2118         {
2119                 return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
2120         }
2121
2122 //      RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs)
2123 //      {
2124 //              return RValue<Byte8>(Nucleus::createShl(lhs.value, rhs.value));
2125 //      }
2126
2127 //      RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs)
2128 //      {
2129 //              return RValue<Byte8>(Nucleus::createLShr(lhs.value, rhs.value));
2130 //      }
2131
2132         RValue<Byte8> operator+=(Byte8 &lhs, RValue<Byte8> rhs)
2133         {
2134                 return lhs = lhs + rhs;
2135         }
2136
2137         RValue<Byte8> operator-=(Byte8 &lhs, RValue<Byte8> rhs)
2138         {
2139                 return lhs = lhs - rhs;
2140         }
2141
2142 //      RValue<Byte8> operator*=(Byte8 &lhs, RValue<Byte8> rhs)
2143 //      {
2144 //              return lhs = lhs * rhs;
2145 //      }
2146
2147 //      RValue<Byte8> operator/=(Byte8 &lhs, RValue<Byte8> rhs)
2148 //      {
2149 //              return lhs = lhs / rhs;
2150 //      }
2151
2152 //      RValue<Byte8> operator%=(Byte8 &lhs, RValue<Byte8> rhs)
2153 //      {
2154 //              return lhs = lhs % rhs;
2155 //      }
2156
2157         RValue<Byte8> operator&=(Byte8 &lhs, RValue<Byte8> rhs)
2158         {
2159                 return lhs = lhs & rhs;
2160         }
2161
2162         RValue<Byte8> operator|=(Byte8 &lhs, RValue<Byte8> rhs)
2163         {
2164                 return lhs = lhs | rhs;
2165         }
2166
2167         RValue<Byte8> operator^=(Byte8 &lhs, RValue<Byte8> rhs)
2168         {
2169                 return lhs = lhs ^ rhs;
2170         }
2171
2172 //      RValue<Byte8> operator<<=(Byte8 &lhs, RValue<Byte8> rhs)
2173 //      {
2174 //              return lhs = lhs << rhs;
2175 //      }
2176
2177 //      RValue<Byte8> operator>>=(Byte8 &lhs, RValue<Byte8> rhs)
2178 //      {
2179 //              return lhs = lhs >> rhs;
2180 //      }
2181
2182 //      RValue<Byte8> operator+(RValue<Byte8> val)
2183 //      {
2184 //              return val;
2185 //      }
2186
2187 //      RValue<Byte8> operator-(RValue<Byte8> val)
2188 //      {
2189 //              return RValue<Byte8>(Nucleus::createNeg(val.value));
2190 //      }
2191
2192         RValue<Byte8> operator~(RValue<Byte8> val)
2193         {
2194                 return RValue<Byte8>(Nucleus::createNot(val.value));
2195         }
2196
2197         RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2198         {
2199                 return x86::paddusb(x, y);
2200         }
2201
2202         RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2203         {
2204                 return x86::psubusb(x, y);
2205         }
2206
2207         RValue<Short4> Unpack(RValue<Byte4> x)
2208         {
2209                 int shuffle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};   // Real type is v16i8
2210                 return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
2211         }
2212
2213         RValue<Short4> Unpack(RValue<Byte4> x, RValue<Byte4> y)
2214         {
2215                 return UnpackLow(As<Byte8>(x), As<Byte8>(y));
2216         }
2217
2218         RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
2219         {
2220                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2221                 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2222         }
2223
2224         RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
2225         {
2226                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2227                 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2228                 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
2229         }
2230
2231         RValue<Int> SignMask(RValue<Byte8> x)
2232         {
2233                 return x86::pmovmskb(x);
2234         }
2235
2236 //      RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2237 //      {
2238 //              return x86::pcmpgtb(x, y);   // FIXME: Signedness
2239 //      }
2240
2241         RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2242         {
2243                 return x86::pcmpeqb(x, y);
2244         }
2245
2246         Type *Byte8::getType()
2247         {
2248                 return T(Type_v8i8);
2249         }
2250
2251         SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2252         {
2253                 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
2254                 Value *vector = V(Nucleus::createConstantVector(constantVector, getType()));
2255
2256                 storeValue(Nucleus::createBitCast(vector, getType()));
2257         }
2258
2259         SByte8::SByte8(RValue<SByte8> rhs)
2260         {
2261                 storeValue(rhs.value);
2262         }
2263
2264         SByte8::SByte8(const SByte8 &rhs)
2265         {
2266                 Value *value = rhs.loadValue();
2267                 storeValue(value);
2268         }
2269
2270         SByte8::SByte8(const Reference<SByte8> &rhs)
2271         {
2272                 Value *value = rhs.loadValue();
2273                 storeValue(value);
2274         }
2275
2276         RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs)
2277         {
2278                 storeValue(rhs.value);
2279
2280                 return rhs;
2281         }
2282
2283         RValue<SByte8> SByte8::operator=(const SByte8 &rhs)
2284         {
2285                 Value *value = rhs.loadValue();
2286                 storeValue(value);
2287
2288                 return RValue<SByte8>(value);
2289         }
2290
2291         RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs)
2292         {
2293                 Value *value = rhs.loadValue();
2294                 storeValue(value);
2295
2296                 return RValue<SByte8>(value);
2297         }
2298
2299         RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs)
2300         {
2301                 return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
2302         }
2303
2304         RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs)
2305         {
2306                 return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
2307         }
2308
2309 //      RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs)
2310 //      {
2311 //              return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value));
2312 //      }
2313
2314 //      RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs)
2315 //      {
2316 //              return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value));
2317 //      }
2318
2319 //      RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs)
2320 //      {
2321 //              return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value));
2322 //      }
2323
2324         RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs)
2325         {
2326                 return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value));
2327         }
2328
2329         RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs)
2330         {
2331                 return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value));
2332         }
2333
2334         RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs)
2335         {
2336                 return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value));
2337         }
2338
2339 //      RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2340 //      {
2341 //              return RValue<SByte8>(Nucleus::createShl(lhs.value, rhs.value));
2342 //      }
2343
2344 //      RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2345 //      {
2346 //              return RValue<SByte8>(Nucleus::createAShr(lhs.value, rhs.value));
2347 //      }
2348
2349         RValue<SByte8> operator+=(SByte8 &lhs, RValue<SByte8> rhs)
2350         {
2351                 return lhs = lhs + rhs;
2352         }
2353
2354         RValue<SByte8> operator-=(SByte8 &lhs, RValue<SByte8> rhs)
2355         {
2356                 return lhs = lhs - rhs;
2357         }
2358
2359 //      RValue<SByte8> operator*=(SByte8 &lhs, RValue<SByte8> rhs)
2360 //      {
2361 //              return lhs = lhs * rhs;
2362 //      }
2363
2364 //      RValue<SByte8> operator/=(SByte8 &lhs, RValue<SByte8> rhs)
2365 //      {
2366 //              return lhs = lhs / rhs;
2367 //      }
2368
2369 //      RValue<SByte8> operator%=(SByte8 &lhs, RValue<SByte8> rhs)
2370 //      {
2371 //              return lhs = lhs % rhs;
2372 //      }
2373
2374         RValue<SByte8> operator&=(SByte8 &lhs, RValue<SByte8> rhs)
2375         {
2376                 return lhs = lhs & rhs;
2377         }
2378
2379         RValue<SByte8> operator|=(SByte8 &lhs, RValue<SByte8> rhs)
2380         {
2381                 return lhs = lhs | rhs;
2382         }
2383
2384         RValue<SByte8> operator^=(SByte8 &lhs, RValue<SByte8> rhs)
2385         {
2386                 return lhs = lhs ^ rhs;
2387         }
2388
2389 //      RValue<SByte8> operator<<=(SByte8 &lhs, RValue<SByte8> rhs)
2390 //      {
2391 //              return lhs = lhs << rhs;
2392 //      }
2393
2394 //      RValue<SByte8> operator>>=(SByte8 &lhs, RValue<SByte8> rhs)
2395 //      {
2396 //              return lhs = lhs >> rhs;
2397 //      }
2398
2399 //      RValue<SByte8> operator+(RValue<SByte8> val)
2400 //      {
2401 //              return val;
2402 //      }
2403
2404 //      RValue<SByte8> operator-(RValue<SByte8> val)
2405 //      {
2406 //              return RValue<SByte8>(Nucleus::createNeg(val.value));
2407 //      }
2408
2409         RValue<SByte8> operator~(RValue<SByte8> val)
2410         {
2411                 return RValue<SByte8>(Nucleus::createNot(val.value));
2412         }
2413
2414         RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2415         {
2416                 return x86::paddsb(x, y);
2417         }
2418
2419         RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2420         {
2421                 return x86::psubsb(x, y);
2422         }
2423
2424         RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
2425         {
2426                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2427                 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2428         }
2429
2430         RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
2431         {
2432                 int shuffle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};   // Real type is v16i8
2433                 auto lowHigh = RValue<Byte16>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2434                 return As<Short4>(Swizzle(As<Int4>(lowHigh), 0xEE));
2435         }
2436
2437         RValue<Int> SignMask(RValue<SByte8> x)
2438         {
2439                 return x86::pmovmskb(As<Byte8>(x));
2440         }
2441
2442         RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2443         {
2444                 return x86::pcmpgtb(x, y);
2445         }
2446
2447         RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2448         {
2449                 return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
2450         }
2451
2452         Type *SByte8::getType()
2453         {
2454                 return T(Type_v8i8);
2455         }
2456
2457         Byte16::Byte16(RValue<Byte16> rhs)
2458         {
2459                 storeValue(rhs.value);
2460         }
2461
2462         Byte16::Byte16(const Byte16 &rhs)
2463         {
2464                 Value *value = rhs.loadValue();
2465                 storeValue(value);
2466         }
2467
2468         Byte16::Byte16(const Reference<Byte16> &rhs)
2469         {
2470                 Value *value = rhs.loadValue();
2471                 storeValue(value);
2472         }
2473
2474         RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs)
2475         {
2476                 storeValue(rhs.value);
2477
2478                 return rhs;
2479         }
2480
2481         RValue<Byte16> Byte16::operator=(const Byte16 &rhs)
2482         {
2483                 Value *value = rhs.loadValue();
2484                 storeValue(value);
2485
2486                 return RValue<Byte16>(value);
2487         }
2488
2489         RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs)
2490         {
2491                 Value *value = rhs.loadValue();
2492                 storeValue(value);
2493
2494                 return RValue<Byte16>(value);
2495         }
2496
2497         Type *Byte16::getType()
2498         {
2499                 return T(llvm::VectorType::get(T(Byte::getType()), 16));
2500         }
2501
2502         Type *SByte16::getType()
2503         {
2504                 return T(llvm::VectorType::get(T(SByte::getType()), 16));
2505         }
2506
2507         Short2::Short2(RValue<Short4> cast)
2508         {
2509                 storeValue(Nucleus::createBitCast(cast.value, getType()));
2510         }
2511
2512         Type *Short2::getType()
2513         {
2514                 return T(Type_v2i16);
2515         }
2516
2517         UShort2::UShort2(RValue<UShort4> cast)
2518         {
2519                 storeValue(Nucleus::createBitCast(cast.value, getType()));
2520         }
2521
2522         Type *UShort2::getType()
2523         {
2524                 return T(Type_v2i16);
2525         }
2526
2527         Short4::Short4(RValue<Int> cast)
2528         {
2529                 Value *vector = loadValue();
2530                 Value *element = Nucleus::createTrunc(cast.value, Short::getType());
2531                 Value *insert = Nucleus::createInsertElement(vector, element, 0);
2532                 Value *swizzle = Swizzle(RValue<Short4>(insert), 0x00).value;
2533
2534                 storeValue(swizzle);
2535         }
2536
2537         Short4::Short4(RValue<Int4> cast)
2538         {
2539                 int select[8] = {0, 2, 4, 6, 0, 2, 4, 6};
2540                 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2541
2542                 Value *packed = Nucleus::createShuffleVector(short8, short8, select);
2543                 Value *short4 = As<Short4>(Int2(As<Int4>(packed))).value;
2544
2545                 storeValue(short4);
2546         }
2547
2548 //      Short4::Short4(RValue<Float> cast)
2549 //      {
2550 //      }
2551
2552         Short4::Short4(RValue<Float4> cast)
2553         {
2554                 Int4 v4i32 = Int4(cast);
2555                 v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
2556
2557                 storeValue(As<Short4>(Int2(v4i32)).value);
2558         }
2559
2560         Short4::Short4(short xyzw)
2561         {
2562                 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
2563                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
2564         }
2565
2566         Short4::Short4(short x, short y, short z, short w)
2567         {
2568                 int64_t constantVector[4] = {x, y, z, w};
2569                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
2570         }
2571
2572         Short4::Short4(RValue<Short4> rhs)
2573         {
2574                 storeValue(rhs.value);
2575         }
2576
2577         Short4::Short4(const Short4 &rhs)
2578         {
2579                 Value *value = rhs.loadValue();
2580                 storeValue(value);
2581         }
2582
2583         Short4::Short4(const Reference<Short4> &rhs)
2584         {
2585                 Value *value = rhs.loadValue();
2586                 storeValue(value);
2587         }
2588
2589         Short4::Short4(RValue<UShort4> rhs)
2590         {
2591                 storeValue(rhs.value);
2592         }
2593
2594         Short4::Short4(const UShort4 &rhs)
2595         {
2596                 storeValue(rhs.loadValue());
2597         }
2598
2599         Short4::Short4(const Reference<UShort4> &rhs)
2600         {
2601                 storeValue(rhs.loadValue());
2602         }
2603
2604         RValue<Short4> Short4::operator=(RValue<Short4> rhs)
2605         {
2606                 storeValue(rhs.value);
2607
2608                 return rhs;
2609         }
2610
2611         RValue<Short4> Short4::operator=(const Short4 &rhs)
2612         {
2613                 Value *value = rhs.loadValue();
2614                 storeValue(value);
2615
2616                 return RValue<Short4>(value);
2617         }
2618
2619         RValue<Short4> Short4::operator=(const Reference<Short4> &rhs)
2620         {
2621                 Value *value = rhs.loadValue();
2622                 storeValue(value);
2623
2624                 return RValue<Short4>(value);
2625         }
2626
2627         RValue<Short4> Short4::operator=(RValue<UShort4> rhs)
2628         {
2629                 storeValue(rhs.value);
2630
2631                 return RValue<Short4>(rhs);
2632         }
2633
2634         RValue<Short4> Short4::operator=(const UShort4 &rhs)
2635         {
2636                 Value *value = rhs.loadValue();
2637                 storeValue(value);
2638
2639                 return RValue<Short4>(value);
2640         }
2641
2642         RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs)
2643         {
2644                 Value *value = rhs.loadValue();
2645                 storeValue(value);
2646
2647                 return RValue<Short4>(value);
2648         }
2649
2650         RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs)
2651         {
2652                 return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
2653         }
2654
2655         RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs)
2656         {
2657                 return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
2658         }
2659
2660         RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs)
2661         {
2662                 return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
2663         }
2664
2665 //      RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs)
2666 //      {
2667 //              return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value));
2668 //      }
2669
2670 //      RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs)
2671 //      {
2672 //              return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value));
2673 //      }
2674
2675         RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs)
2676         {
2677                 return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
2678         }
2679
2680         RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs)
2681         {
2682                 return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
2683         }
2684
2685         RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs)
2686         {
2687                 return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
2688         }
2689
2690         RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2691         {
2692         //      return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2693
2694                 return x86::psllw(lhs, rhs);
2695         }
2696
2697         RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2698         {
2699         //      return RValue<Short4>(Nucleus::createAShr(lhs.value, rhs.value));
2700
2701                 return x86::psraw(lhs, rhs);
2702         }
2703
2704         RValue<Short4> operator+=(Short4 &lhs, RValue<Short4> rhs)
2705         {
2706                 return lhs = lhs + rhs;
2707         }
2708
2709         RValue<Short4> operator-=(Short4 &lhs, RValue<Short4> rhs)
2710         {
2711                 return lhs = lhs - rhs;
2712         }
2713
2714         RValue<Short4> operator*=(Short4 &lhs, RValue<Short4> rhs)
2715         {
2716                 return lhs = lhs * rhs;
2717         }
2718
2719 //      RValue<Short4> operator/=(Short4 &lhs, RValue<Short4> rhs)
2720 //      {
2721 //              return lhs = lhs / rhs;
2722 //      }
2723
2724 //      RValue<Short4> operator%=(Short4 &lhs, RValue<Short4> rhs)
2725 //      {
2726 //              return lhs = lhs % rhs;
2727 //      }
2728
2729         RValue<Short4> operator&=(Short4 &lhs, RValue<Short4> rhs)
2730         {
2731                 return lhs = lhs & rhs;
2732         }
2733
2734         RValue<Short4> operator|=(Short4 &lhs, RValue<Short4> rhs)
2735         {
2736                 return lhs = lhs | rhs;
2737         }
2738
2739         RValue<Short4> operator^=(Short4 &lhs, RValue<Short4> rhs)
2740         {
2741                 return lhs = lhs ^ rhs;
2742         }
2743
2744         RValue<Short4> operator<<=(Short4 &lhs, unsigned char rhs)
2745         {
2746                 return lhs = lhs << rhs;
2747         }
2748
2749         RValue<Short4> operator>>=(Short4 &lhs, unsigned char rhs)
2750         {
2751                 return lhs = lhs >> rhs;
2752         }
2753
2754 //      RValue<Short4> operator+(RValue<Short4> val)
2755 //      {
2756 //              return val;
2757 //      }
2758
2759         RValue<Short4> operator-(RValue<Short4> val)
2760         {
2761                 return RValue<Short4>(Nucleus::createNeg(val.value));
2762         }
2763
2764         RValue<Short4> operator~(RValue<Short4> val)
2765         {
2766                 return RValue<Short4>(Nucleus::createNot(val.value));
2767         }
2768
2769         RValue<Short4> RoundShort4(RValue<Float4> cast)
2770         {
2771                 RValue<Int4> int4 = RoundInt(cast);
2772                 return As<Short4>(Pack(int4, int4));
2773         }
2774
2775         RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2776         {
2777                 return x86::pmaxsw(x, y);
2778         }
2779
2780         RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2781         {
2782                 return x86::pminsw(x, y);
2783         }
2784
2785         RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2786         {
2787                 return x86::paddsw(x, y);
2788         }
2789
2790         RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2791         {
2792                 return x86::psubsw(x, y);
2793         }
2794
2795         RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2796         {
2797                 return x86::pmulhw(x, y);
2798         }
2799
2800         RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2801         {
2802                 return x86::pmaddwd(x, y);
2803         }
2804
2805         RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y)
2806         {
2807                 auto result = x86::packsswb(x, y);
2808
2809                 return As<SByte8>(Swizzle(As<Int4>(result), 0x88));
2810         }
2811
2812         RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
2813         {
2814                 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
2815                 return As<Int2>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2816         }
2817
2818         RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
2819         {
2820                 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};   // Real type is v8i16
2821                 auto lowHigh = RValue<Short8>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
2822                 return As<Int2>(Swizzle(As<Int4>(lowHigh), 0xEE));
2823         }
2824
2825         RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select)
2826         {
2827                 // Real type is v8i16
2828                 int shuffle[8] =
2829                 {
2830                         (select >> 0) & 0x03,
2831                         (select >> 2) & 0x03,
2832                         (select >> 4) & 0x03,
2833                         (select >> 6) & 0x03,
2834                         (select >> 0) & 0x03,
2835                         (select >> 2) & 0x03,
2836                         (select >> 4) & 0x03,
2837                         (select >> 6) & 0x03,
2838                 };
2839
2840                 return As<Short4>(Nucleus::createShuffleVector(x.value, x.value, shuffle));
2841         }
2842
2843         RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i)
2844         {
2845                 return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
2846         }
2847
2848         RValue<Short> Extract(RValue<Short4> val, int i)
2849         {
2850                 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
2851         }
2852
2853         RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2854         {
2855                 return x86::pcmpgtw(x, y);
2856         }
2857
2858         RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2859         {
2860                 return x86::pcmpeqw(x, y);
2861         }
2862
2863         Type *Short4::getType()
2864         {
2865                 return T(Type_v4i16);
2866         }
2867
2868         UShort4::UShort4(RValue<Int4> cast)
2869         {
2870                 *this = Short4(cast);
2871         }
2872
2873         UShort4::UShort4(RValue<Float4> cast, bool saturate)
2874         {
2875                 if(saturate)
2876                 {
2877                         if(CPUID::supportsSSE4_1())
2878                         {
2879                                 Int4 int4(Min(cast, Float4(0xFFFF)));   // packusdw takes care of 0x0000 saturation
2880                                 *this = As<Short4>(Pack(As<UInt4>(int4), As<UInt4>(int4)));
2881                         }
2882                         else
2883                         {
2884                                 *this = Short4(Int4(Max(Min(cast, Float4(0xFFFF)), Float4(0x0000))));
2885                         }
2886                 }
2887                 else
2888                 {
2889                         *this = Short4(Int4(cast));
2890                 }
2891         }
2892
2893         UShort4::UShort4(unsigned short xyzw)
2894         {
2895                 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
2896                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
2897         }
2898
2899         UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
2900         {
2901                 int64_t constantVector[4] = {x, y, z, w};
2902                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
2903         }
2904
2905         UShort4::UShort4(RValue<UShort4> rhs)
2906         {
2907                 storeValue(rhs.value);
2908         }
2909
2910         UShort4::UShort4(const UShort4 &rhs)
2911         {
2912                 Value *value = rhs.loadValue();
2913                 storeValue(value);
2914         }
2915
2916         UShort4::UShort4(const Reference<UShort4> &rhs)
2917         {
2918                 Value *value = rhs.loadValue();
2919                 storeValue(value);
2920         }
2921
2922         UShort4::UShort4(RValue<Short4> rhs)
2923         {
2924                 storeValue(rhs.value);
2925         }
2926
2927         UShort4::UShort4(const Short4 &rhs)
2928         {
2929                 Value *value = rhs.loadValue();
2930                 storeValue(value);
2931         }
2932
2933         UShort4::UShort4(const Reference<Short4> &rhs)
2934         {
2935                 Value *value = rhs.loadValue();
2936                 storeValue(value);
2937         }
2938
2939         RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs)
2940         {
2941                 storeValue(rhs.value);
2942
2943                 return rhs;
2944         }
2945
2946         RValue<UShort4> UShort4::operator=(const UShort4 &rhs)
2947         {
2948                 Value *value = rhs.loadValue();
2949                 storeValue(value);
2950
2951                 return RValue<UShort4>(value);
2952         }
2953
2954         RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs)
2955         {
2956                 Value *value = rhs.loadValue();
2957                 storeValue(value);
2958
2959                 return RValue<UShort4>(value);
2960         }
2961
2962         RValue<UShort4> UShort4::operator=(RValue<Short4> rhs)
2963         {
2964                 storeValue(rhs.value);
2965
2966                 return RValue<UShort4>(rhs);
2967         }
2968
2969         RValue<UShort4> UShort4::operator=(const Short4 &rhs)
2970         {
2971                 Value *value = rhs.loadValue();
2972                 storeValue(value);
2973
2974                 return RValue<UShort4>(value);
2975         }
2976
2977         RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs)
2978         {
2979                 Value *value = rhs.loadValue();
2980                 storeValue(value);
2981
2982                 return RValue<UShort4>(value);
2983         }
2984
2985         RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs)
2986         {
2987                 return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
2988         }
2989
2990         RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs)
2991         {
2992                 return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
2993         }
2994
2995         RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs)
2996         {
2997                 return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
2998         }
2999
3000         RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs)
3001         {
3002                 return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value));
3003         }
3004
3005         RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs)
3006         {
3007                 return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value));
3008         }
3009
3010         RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs)
3011         {
3012                 return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
3013         }
3014
3015         RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
3016         {
3017         //      return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
3018
3019                 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
3020         }
3021
3022         RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
3023         {
3024         //      return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
3025
3026                 return x86::psrlw(lhs, rhs);
3027         }
3028
3029         RValue<UShort4> operator<<=(UShort4 &lhs, unsigned char rhs)
3030         {
3031                 return lhs = lhs << rhs;
3032         }
3033
3034         RValue<UShort4> operator>>=(UShort4 &lhs, unsigned char rhs)
3035         {
3036                 return lhs = lhs >> rhs;
3037         }
3038
3039         RValue<UShort4> operator~(RValue<UShort4> val)
3040         {
3041                 return RValue<UShort4>(Nucleus::createNot(val.value));
3042         }
3043
3044         RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
3045         {
3046                 return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
3047         }
3048
3049         RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
3050         {
3051                 return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
3052         }
3053
3054         RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
3055         {
3056                 return x86::paddusw(x, y);
3057         }
3058
3059         RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
3060         {
3061                 return x86::psubusw(x, y);
3062         }
3063
3064         RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
3065         {
3066                 return x86::pmulhuw(x, y);
3067         }
3068
3069         RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3070         {
3071                 return x86::pavgw(x, y);
3072         }
3073
3074         RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y)
3075         {
3076                 auto result = x86::packuswb(x, y);
3077
3078                 return As<Byte8>(Swizzle(As<Int4>(result), 0x88));
3079         }
3080
3081         Type *UShort4::getType()
3082         {
3083                 return T(Type_v4i16);
3084         }
3085
3086         Short8::Short8(short c)
3087         {
3088                 int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
3089                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3090         }
3091
3092         Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
3093         {
3094                 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3095                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3096         }
3097
3098         Short8::Short8(RValue<Short8> rhs)
3099         {
3100                 storeValue(rhs.value);
3101         }
3102
3103         Short8::Short8(const Reference<Short8> &rhs)
3104         {
3105                 Value *value = rhs.loadValue();
3106                 storeValue(value);
3107         }
3108
3109         Short8::Short8(RValue<Short4> lo, RValue<Short4> hi)
3110         {
3111                 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
3112                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
3113
3114                 storeValue(packed);
3115         }
3116
3117         RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs)
3118         {
3119                 return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value));
3120         }
3121
3122         RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs)
3123         {
3124                 return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
3125         }
3126
3127         RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3128         {
3129                 return x86::psllw(lhs, rhs);   // FIXME: Fallback required
3130         }
3131
3132         RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3133         {
3134                 return x86::psraw(lhs, rhs);   // FIXME: Fallback required
3135         }
3136
3137         RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3138         {
3139                 return x86::pmaddwd(x, y);   // FIXME: Fallback required
3140         }
3141
3142         RValue<Int4> Abs(RValue<Int4> x)
3143         {
3144                 auto negative = x >> 31;
3145                 return (x ^ negative) - negative;
3146         }
3147
3148         RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3149         {
3150                 return x86::pmulhw(x, y);   // FIXME: Fallback required
3151         }
3152
3153         Type *Short8::getType()
3154         {
3155                 return T(llvm::VectorType::get(T(Short::getType()), 8));
3156         }
3157
3158         UShort8::UShort8(unsigned short c)
3159         {
3160                 int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
3161                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3162         }
3163
3164         UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
3165         {
3166                 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3167                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3168         }
3169
3170         UShort8::UShort8(RValue<UShort8> rhs)
3171         {
3172                 storeValue(rhs.value);
3173         }
3174
3175         UShort8::UShort8(const Reference<UShort8> &rhs)
3176         {
3177                 Value *value = rhs.loadValue();
3178                 storeValue(value);
3179         }
3180
3181         UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi)
3182         {
3183                 int shuffle[8] = {0, 1, 2, 3, 8, 9, 10, 11};   // Real type is v8i16
3184                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
3185
3186                 storeValue(packed);
3187         }
3188
3189         RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs)
3190         {
3191                 storeValue(rhs.value);
3192
3193                 return rhs;
3194         }
3195
3196         RValue<UShort8> UShort8::operator=(const UShort8 &rhs)
3197         {
3198                 Value *value = rhs.loadValue();
3199                 storeValue(value);
3200
3201                 return RValue<UShort8>(value);
3202         }
3203
3204         RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs)
3205         {
3206                 Value *value = rhs.loadValue();
3207                 storeValue(value);
3208
3209                 return RValue<UShort8>(value);
3210         }
3211
3212         RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs)
3213         {
3214                 return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
3215         }
3216
3217         RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3218         {
3219                 return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs));   // FIXME: Fallback required
3220         }
3221
3222         RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3223         {
3224                 return x86::psrlw(lhs, rhs);   // FIXME: Fallback required
3225         }
3226
3227         RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
3228         {
3229                 return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value));
3230         }
3231
3232         RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs)
3233         {
3234                 return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value));
3235         }
3236
3237         RValue<UShort8> operator+=(UShort8 &lhs, RValue<UShort8> rhs)
3238         {
3239                 return lhs = lhs + rhs;
3240         }
3241
3242         RValue<UShort8> operator~(RValue<UShort8> val)
3243         {
3244                 return RValue<UShort8>(Nucleus::createNot(val.value));
3245         }
3246
3247         RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
3248         {
3249                 int pshufb[16] =
3250                 {
3251                         select0 + 0,
3252                         select0 + 1,
3253                         select1 + 0,
3254                         select1 + 1,
3255                         select2 + 0,
3256                         select2 + 1,
3257                         select3 + 0,
3258                         select3 + 1,
3259                         select4 + 0,
3260                         select4 + 1,
3261                         select5 + 0,
3262                         select5 + 1,
3263                         select6 + 0,
3264                         select6 + 1,
3265                         select7 + 0,
3266                         select7 + 1,
3267                 };
3268
3269                 Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
3270                 Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb);
3271                 Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
3272
3273                 return RValue<UShort8>(short8);
3274         }
3275
3276         RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3277         {
3278                 return x86::pmulhuw(x, y);   // FIXME: Fallback required
3279         }
3280
3281         Type *UShort8::getType()
3282         {
3283                 return T(llvm::VectorType::get(T(UShort::getType()), 8));
3284         }
3285
3286         Int::Int(Argument<Int> argument)
3287         {
3288                 storeValue(argument.value);
3289         }
3290
3291         Int::Int(RValue<Byte> cast)
3292         {
3293                 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3294
3295                 storeValue(integer);
3296         }
3297
3298         Int::Int(RValue<SByte> cast)
3299         {
3300                 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3301
3302                 storeValue(integer);
3303         }
3304
3305         Int::Int(RValue<Short> cast)
3306         {
3307                 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3308
3309                 storeValue(integer);
3310         }
3311
3312         Int::Int(RValue<UShort> cast)
3313         {
3314                 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3315
3316                 storeValue(integer);
3317         }
3318
3319         Int::Int(RValue<Int2> cast)
3320         {
3321                 *this = Extract(cast, 0);
3322         }
3323
3324         Int::Int(RValue<Long> cast)
3325         {
3326                 Value *integer = Nucleus::createTrunc(cast.value, Int::getType());
3327
3328                 storeValue(integer);
3329         }
3330
3331         Int::Int(RValue<Float> cast)
3332         {
3333                 Value *integer = Nucleus::createFPToSI(cast.value, Int::getType());
3334
3335                 storeValue(integer);
3336         }
3337
3338         Int::Int(int x)
3339         {
3340                 storeValue(Nucleus::createConstantInt(x));
3341         }
3342
3343         Int::Int(RValue<Int> rhs)
3344         {
3345                 storeValue(rhs.value);
3346         }
3347
3348         Int::Int(RValue<UInt> rhs)
3349         {
3350                 storeValue(rhs.value);
3351         }
3352
3353         Int::Int(const Int &rhs)
3354         {
3355                 Value *value = rhs.loadValue();
3356                 storeValue(value);
3357         }
3358
3359         Int::Int(const Reference<Int> &rhs)
3360         {
3361                 Value *value = rhs.loadValue();
3362                 storeValue(value);
3363         }
3364
3365         Int::Int(const UInt &rhs)
3366         {
3367                 Value *value = rhs.loadValue();
3368                 storeValue(value);
3369         }
3370
3371         Int::Int(const Reference<UInt> &rhs)
3372         {
3373                 Value *value = rhs.loadValue();
3374                 storeValue(value);
3375         }
3376
3377         RValue<Int> Int::operator=(int rhs)
3378         {
3379                 return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs)));
3380         }
3381
3382         RValue<Int> Int::operator=(RValue<Int> rhs)
3383         {
3384                 storeValue(rhs.value);
3385
3386                 return rhs;
3387         }
3388
3389         RValue<Int> Int::operator=(RValue<UInt> rhs)
3390         {
3391                 storeValue(rhs.value);
3392
3393                 return RValue<Int>(rhs);
3394         }
3395
3396         RValue<Int> Int::operator=(const Int &rhs)
3397         {
3398                 Value *value = rhs.loadValue();
3399                 storeValue(value);
3400
3401                 return RValue<Int>(value);
3402         }
3403
3404         RValue<Int> Int::operator=(const Reference<Int> &rhs)
3405         {
3406                 Value *value = rhs.loadValue();
3407                 storeValue(value);
3408
3409                 return RValue<Int>(value);
3410         }
3411
3412         RValue<Int> Int::operator=(const UInt &rhs)
3413         {
3414                 Value *value = rhs.loadValue();
3415                 storeValue(value);
3416
3417                 return RValue<Int>(value);
3418         }
3419
3420         RValue<Int> Int::operator=(const Reference<UInt> &rhs)
3421         {
3422                 Value *value = rhs.loadValue();
3423                 storeValue(value);
3424
3425                 return RValue<Int>(value);
3426         }
3427
3428         RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs)
3429         {
3430                 return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value));
3431         }
3432
3433         RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs)
3434         {
3435                 return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value));
3436         }
3437
3438         RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs)
3439         {
3440                 return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value));
3441         }
3442
3443         RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs)
3444         {
3445                 return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value));
3446         }
3447
3448         RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs)
3449         {
3450                 return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value));
3451         }
3452
3453         RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs)
3454         {
3455                 return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value));
3456         }
3457
3458         RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs)
3459         {
3460                 return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value));
3461         }
3462
3463         RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs)
3464         {
3465                 return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value));
3466         }
3467
3468         RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs)
3469         {
3470                 return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value));
3471         }
3472
3473         RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs)
3474         {
3475                 return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value));
3476         }
3477
3478         RValue<Int> operator+=(Int &lhs, RValue<Int> rhs)
3479         {
3480                 return lhs = lhs + rhs;
3481         }
3482
3483         RValue<Int> operator-=(Int &lhs, RValue<Int> rhs)
3484         {
3485                 return lhs = lhs - rhs;
3486         }
3487
3488         RValue<Int> operator*=(Int &lhs, RValue<Int> rhs)
3489         {
3490                 return lhs = lhs * rhs;
3491         }
3492
3493         RValue<Int> operator/=(Int &lhs, RValue<Int> rhs)
3494         {
3495                 return lhs = lhs / rhs;
3496         }
3497
3498         RValue<Int> operator%=(Int &lhs, RValue<Int> rhs)
3499         {
3500                 return lhs = lhs % rhs;
3501         }
3502
3503         RValue<Int> operator&=(Int &lhs, RValue<Int> rhs)
3504         {
3505                 return lhs = lhs & rhs;
3506         }
3507
3508         RValue<Int> operator|=(Int &lhs, RValue<Int> rhs)
3509         {
3510                 return lhs = lhs | rhs;
3511         }
3512
3513         RValue<Int> operator^=(Int &lhs, RValue<Int> rhs)
3514         {
3515                 return lhs = lhs ^ rhs;
3516         }
3517
3518         RValue<Int> operator<<=(Int &lhs, RValue<Int> rhs)
3519         {
3520                 return lhs = lhs << rhs;
3521         }
3522
3523         RValue<Int> operator>>=(Int &lhs, RValue<Int> rhs)
3524         {
3525                 return lhs = lhs >> rhs;
3526         }
3527
3528         RValue<Int> operator+(RValue<Int> val)
3529         {
3530                 return val;
3531         }
3532
3533         RValue<Int> operator-(RValue<Int> val)
3534         {
3535                 return RValue<Int>(Nucleus::createNeg(val.value));
3536         }
3537
3538         RValue<Int> operator~(RValue<Int> val)
3539         {
3540                 return RValue<Int>(Nucleus::createNot(val.value));
3541         }
3542
3543         RValue<Int> operator++(Int &val, int)   // Post-increment
3544         {
3545                 RValue<Int> res = val;
3546
3547                 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantInt(1)));
3548                 val.storeValue(inc);
3549
3550                 return res;
3551         }
3552
3553         const Int &operator++(Int &val)   // Pre-increment
3554         {
3555                 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantInt(1)));
3556                 val.storeValue(inc);
3557
3558                 return val;
3559         }
3560
3561         RValue<Int> operator--(Int &val, int)   // Post-decrement
3562         {
3563                 RValue<Int> res = val;
3564
3565                 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantInt(1)));
3566                 val.storeValue(inc);
3567
3568                 return res;
3569         }
3570
3571         const Int &operator--(Int &val)   // Pre-decrement
3572         {
3573                 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantInt(1)));
3574                 val.storeValue(inc);
3575
3576                 return val;
3577         }
3578
3579         RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs)
3580         {
3581                 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
3582         }
3583
3584         RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs)
3585         {
3586                 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
3587         }
3588
3589         RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs)
3590         {
3591                 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
3592         }
3593
3594         RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs)
3595         {
3596                 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
3597         }
3598
3599         RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs)
3600         {
3601                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
3602         }
3603
3604         RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs)
3605         {
3606                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
3607         }
3608
3609         RValue<Int> Max(RValue<Int> x, RValue<Int> y)
3610         {
3611                 return IfThenElse(x > y, x, y);
3612         }
3613
3614         RValue<Int> Min(RValue<Int> x, RValue<Int> y)
3615         {
3616                 return IfThenElse(x < y, x, y);
3617         }
3618
3619         RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max)
3620         {
3621                 return Min(Max(x, min), max);
3622         }
3623
3624         RValue<Int> RoundInt(RValue<Float> cast)
3625         {
3626                 return x86::cvtss2si(cast);
3627
3628         //      return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f));
3629         }
3630
3631         Type *Int::getType()
3632         {
3633                 return T(llvm::Type::getInt32Ty(*::context));
3634         }
3635
3636         Long::Long(RValue<Int> cast)
3637         {
3638                 Value *integer = Nucleus::createSExt(cast.value, Long::getType());
3639
3640                 storeValue(integer);
3641         }
3642
3643         Long::Long(RValue<UInt> cast)
3644         {
3645                 Value *integer = Nucleus::createZExt(cast.value, Long::getType());
3646
3647                 storeValue(integer);
3648         }
3649
3650         Long::Long(RValue<Long> rhs)
3651         {
3652                 storeValue(rhs.value);
3653         }
3654
3655         RValue<Long> Long::operator=(int64_t rhs)
3656         {
3657                 return RValue<Long>(storeValue(Nucleus::createConstantLong(rhs)));
3658         }
3659
3660         RValue<Long> Long::operator=(RValue<Long> rhs)
3661         {
3662                 storeValue(rhs.value);
3663
3664                 return rhs;
3665         }
3666
3667         RValue<Long> Long::operator=(const Long &rhs)
3668         {
3669                 Value *value = rhs.loadValue();
3670                 storeValue(value);
3671
3672                 return RValue<Long>(value);
3673         }
3674
3675         RValue<Long> Long::operator=(const Reference<Long> &rhs)
3676         {
3677                 Value *value = rhs.loadValue();
3678                 storeValue(value);
3679
3680                 return RValue<Long>(value);
3681         }
3682
3683         RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs)
3684         {
3685                 return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value));
3686         }
3687
3688         RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs)
3689         {
3690                 return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
3691         }
3692
3693         RValue<Long> operator+=(Long &lhs, RValue<Long> rhs)
3694         {
3695                 return lhs = lhs + rhs;
3696         }
3697
3698         RValue<Long> operator-=(Long &lhs, RValue<Long> rhs)
3699         {
3700                 return lhs = lhs - rhs;
3701         }
3702
3703         RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y)
3704         {
3705                 return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value));
3706         }
3707
3708         Type *Long::getType()
3709         {
3710                 return T(llvm::Type::getInt64Ty(*::context));
3711         }
3712
3713         UInt::UInt(Argument<UInt> argument)
3714         {
3715                 storeValue(argument.value);
3716         }
3717
3718         UInt::UInt(RValue<UShort> cast)
3719         {
3720                 Value *integer = Nucleus::createZExt(cast.value, UInt::getType());
3721
3722                 storeValue(integer);
3723         }
3724
3725         UInt::UInt(RValue<Long> cast)
3726         {
3727                 Value *integer = Nucleus::createTrunc(cast.value, UInt::getType());
3728
3729                 storeValue(integer);
3730         }
3731
3732         UInt::UInt(RValue<Float> cast)
3733         {
3734                 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
3735                 // Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType());
3736
3737                 // Smallest positive value representable in UInt, but not in Int
3738                 const unsigned int ustart = 0x80000000u;
3739                 const float ustartf = float(ustart);
3740
3741                 // If the value is negative, store 0, otherwise store the result of the conversion
3742                 storeValue((~(As<Int>(cast) >> 31) &
3743                 // Check if the value can be represented as an Int
3744                         IfThenElse(cast >= ustartf,
3745                 // If the value is too large, subtract ustart and re-add it after conversion.
3746                                 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
3747                 // Otherwise, just convert normally
3748                                 Int(cast))).value);
3749         }
3750
3751         UInt::UInt(int x)
3752         {
3753                 storeValue(Nucleus::createConstantInt(x));
3754         }
3755
3756         UInt::UInt(unsigned int x)
3757         {
3758                 storeValue(Nucleus::createConstantInt(x));
3759         }
3760
3761         UInt::UInt(RValue<UInt> rhs)
3762         {
3763                 storeValue(rhs.value);
3764         }
3765
3766         UInt::UInt(RValue<Int> rhs)
3767         {
3768                 storeValue(rhs.value);
3769         }
3770
3771         UInt::UInt(const UInt &rhs)
3772         {
3773                 Value *value = rhs.loadValue();
3774                 storeValue(value);
3775         }
3776
3777         UInt::UInt(const Reference<UInt> &rhs)
3778         {
3779                 Value *value = rhs.loadValue();
3780                 storeValue(value);
3781         }
3782
3783         UInt::UInt(const Int &rhs)
3784         {
3785                 Value *value = rhs.loadValue();
3786                 storeValue(value);
3787         }
3788
3789         UInt::UInt(const Reference<Int> &rhs)
3790         {
3791                 Value *value = rhs.loadValue();
3792                 storeValue(value);
3793         }
3794
3795         RValue<UInt> UInt::operator=(unsigned int rhs)
3796         {
3797                 return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs)));
3798         }
3799
3800         RValue<UInt> UInt::operator=(RValue<UInt> rhs)
3801         {
3802                 storeValue(rhs.value);
3803
3804                 return rhs;
3805         }
3806
3807         RValue<UInt> UInt::operator=(RValue<Int> rhs)
3808         {
3809                 storeValue(rhs.value);
3810
3811                 return RValue<UInt>(rhs);
3812         }
3813
3814         RValue<UInt> UInt::operator=(const UInt &rhs)
3815         {
3816                 Value *value = rhs.loadValue();
3817                 storeValue(value);
3818
3819                 return RValue<UInt>(value);
3820         }
3821
3822         RValue<UInt> UInt::operator=(const Reference<UInt> &rhs)
3823         {
3824                 Value *value = rhs.loadValue();
3825                 storeValue(value);
3826
3827                 return RValue<UInt>(value);
3828         }
3829
3830         RValue<UInt> UInt::operator=(const Int &rhs)
3831         {
3832                 Value *value = rhs.loadValue();
3833                 storeValue(value);
3834
3835                 return RValue<UInt>(value);
3836         }
3837
3838         RValue<UInt> UInt::operator=(const Reference<Int> &rhs)
3839         {
3840                 Value *value = rhs.loadValue();
3841                 storeValue(value);
3842
3843                 return RValue<UInt>(value);
3844         }
3845
3846         RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs)
3847         {
3848                 return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value));
3849         }
3850
3851         RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs)
3852         {
3853                 return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value));
3854         }
3855
3856         RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs)
3857         {
3858                 return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value));
3859         }
3860
3861         RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs)
3862         {
3863                 return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value));
3864         }
3865
3866         RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs)
3867         {
3868                 return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value));
3869         }
3870
3871         RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs)
3872         {
3873                 return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value));
3874         }
3875
3876         RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs)
3877         {
3878                 return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value));
3879         }
3880
3881         RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs)
3882         {
3883                 return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value));
3884         }
3885
3886         RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs)
3887         {
3888                 return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value));
3889         }
3890
3891         RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs)
3892         {
3893                 return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value));
3894         }
3895
3896         RValue<UInt> operator+=(UInt &lhs, RValue<UInt> rhs)
3897         {
3898                 return lhs = lhs + rhs;
3899         }
3900
3901         RValue<UInt> operator-=(UInt &lhs, RValue<UInt> rhs)
3902         {
3903                 return lhs = lhs - rhs;
3904         }
3905
3906         RValue<UInt> operator*=(UInt &lhs, RValue<UInt> rhs)
3907         {
3908                 return lhs = lhs * rhs;
3909         }
3910
3911         RValue<UInt> operator/=(UInt &lhs, RValue<UInt> rhs)
3912         {
3913                 return lhs = lhs / rhs;
3914         }
3915
3916         RValue<UInt> operator%=(UInt &lhs, RValue<UInt> rhs)
3917         {
3918                 return lhs = lhs % rhs;
3919         }
3920
3921         RValue<UInt> operator&=(UInt &lhs, RValue<UInt> rhs)
3922         {
3923                 return lhs = lhs & rhs;
3924         }
3925
3926         RValue<UInt> operator|=(UInt &lhs, RValue<UInt> rhs)
3927         {
3928                 return lhs = lhs | rhs;
3929         }
3930
3931         RValue<UInt> operator^=(UInt &lhs, RValue<UInt> rhs)
3932         {
3933                 return lhs = lhs ^ rhs;
3934         }
3935
3936         RValue<UInt> operator<<=(UInt &lhs, RValue<UInt> rhs)
3937         {
3938                 return lhs = lhs << rhs;
3939         }
3940
3941         RValue<UInt> operator>>=(UInt &lhs, RValue<UInt> rhs)
3942         {
3943                 return lhs = lhs >> rhs;
3944         }
3945
3946         RValue<UInt> operator+(RValue<UInt> val)
3947         {
3948                 return val;
3949         }
3950
3951         RValue<UInt> operator-(RValue<UInt> val)
3952         {
3953                 return RValue<UInt>(Nucleus::createNeg(val.value));
3954         }
3955
3956         RValue<UInt> operator~(RValue<UInt> val)
3957         {
3958                 return RValue<UInt>(Nucleus::createNot(val.value));
3959         }
3960
3961         RValue<UInt> operator++(UInt &val, int)   // Post-increment
3962         {
3963                 RValue<UInt> res = val;
3964
3965                 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantInt(1)));
3966                 val.storeValue(inc);
3967
3968                 return res;
3969         }
3970
3971         const UInt &operator++(UInt &val)   // Pre-increment
3972         {
3973                 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantInt(1)));
3974                 val.storeValue(inc);
3975
3976                 return val;
3977         }
3978
3979         RValue<UInt> operator--(UInt &val, int)   // Post-decrement
3980         {
3981                 RValue<UInt> res = val;
3982
3983                 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantInt(1)));
3984                 val.storeValue(inc);
3985
3986                 return res;
3987         }
3988
3989         const UInt &operator--(UInt &val)   // Pre-decrement
3990         {
3991                 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantInt(1)));
3992                 val.storeValue(inc);
3993
3994                 return val;
3995         }
3996
3997         RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y)
3998         {
3999                 return IfThenElse(x > y, x, y);
4000         }
4001
4002         RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y)
4003         {
4004                 return IfThenElse(x < y, x, y);
4005         }
4006
4007         RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max)
4008         {
4009                 return Min(Max(x, min), max);
4010         }
4011
4012         RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs)
4013         {
4014                 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
4015         }
4016
4017         RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs)
4018         {
4019                 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
4020         }
4021
4022         RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs)
4023         {
4024                 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
4025         }
4026
4027         RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs)
4028         {
4029                 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
4030         }
4031
4032         RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs)
4033         {
4034                 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4035         }
4036
4037         RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs)
4038         {
4039                 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4040         }
4041
4042 //      RValue<UInt> RoundUInt(RValue<Float> cast)
4043 //      {
4044 //              return x86::cvtss2si(val);   // FIXME: Unsigned
4045 //
4046 //      //      return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f));
4047 //      }
4048
4049         Type *UInt::getType()
4050         {
4051                 return T(llvm::Type::getInt32Ty(*::context));
4052         }
4053
4054 //      Int2::Int2(RValue<Int> cast)
4055 //      {
4056 //              Value *extend = Nucleus::createZExt(cast.value, Long::getType());
4057 //              Value *vector = Nucleus::createBitCast(extend, Int2::getType());
4058 //
4059 //              int shuffle[2] = {0, 0};
4060 //              Value *replicate = Nucleus::createShuffleVector(vector, vector, shuffle);
4061 //
4062 //              storeValue(replicate);
4063 //      }
4064
4065         Int2::Int2(RValue<Int4> cast)
4066         {
4067                 storeValue(Nucleus::createBitCast(cast.value, getType()));
4068         }
4069
4070         Int2::Int2(int x, int y)
4071         {
4072                 int64_t constantVector[2] = {x, y};
4073                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4074         }
4075
4076         Int2::Int2(RValue<Int2> rhs)
4077         {
4078                 storeValue(rhs.value);
4079         }
4080
4081         Int2::Int2(const Int2 &rhs)
4082         {
4083                 Value *value = rhs.loadValue();
4084                 storeValue(value);
4085         }
4086
4087         Int2::Int2(const Reference<Int2> &rhs)
4088         {
4089                 Value *value = rhs.loadValue();
4090                 storeValue(value);
4091         }
4092
4093         Int2::Int2(RValue<Int> lo, RValue<Int> hi)
4094         {
4095                 int shuffle[4] = {0, 4, 1, 5};
4096                 Value *packed = Nucleus::createShuffleVector(Int4(lo).loadValue(), Int4(hi).loadValue(), shuffle);
4097
4098                 storeValue(Nucleus::createBitCast(packed, Int2::getType()));
4099         }
4100
4101         RValue<Int2> Int2::operator=(RValue<Int2> rhs)
4102         {
4103                 storeValue(rhs.value);
4104
4105                 return rhs;
4106         }
4107
4108         RValue<Int2> Int2::operator=(const Int2 &rhs)
4109         {
4110                 Value *value = rhs.loadValue();
4111                 storeValue(value);
4112
4113                 return RValue<Int2>(value);
4114         }
4115
4116         RValue<Int2> Int2::operator=(const Reference<Int2> &rhs)
4117         {
4118                 Value *value = rhs.loadValue();
4119                 storeValue(value);
4120
4121                 return RValue<Int2>(value);
4122         }
4123
4124         RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs)
4125         {
4126                 return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
4127         }
4128
4129         RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs)
4130         {
4131                 return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
4132         }
4133
4134 //      RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs)
4135 //      {
4136 //              return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value));
4137 //      }
4138
4139 //      RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs)
4140 //      {
4141 //              return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value));
4142 //      }
4143
4144 //      RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs)
4145 //      {
4146 //              return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value));
4147 //      }
4148
4149         RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs)
4150         {
4151                 return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
4152         }
4153
4154         RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs)
4155         {
4156                 return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
4157         }
4158
4159         RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs)
4160         {
4161                 return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
4162         }
4163
4164         RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
4165         {
4166         //      return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
4167
4168                 return x86::pslld(lhs, rhs);
4169         }
4170
4171         RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
4172         {
4173         //      return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
4174
4175                 return x86::psrad(lhs, rhs);
4176         }
4177
4178         RValue<Int2> operator+=(Int2 &lhs, RValue<Int2> rhs)
4179         {
4180                 return lhs = lhs + rhs;
4181         }
4182
4183         RValue<Int2> operator-=(Int2 &lhs, RValue<Int2> rhs)
4184         {
4185                 return lhs = lhs - rhs;
4186         }
4187
4188 //      RValue<Int2> operator*=(Int2 &lhs, RValue<Int2> rhs)
4189 //      {
4190 //              return lhs = lhs * rhs;
4191 //      }
4192
4193 //      RValue<Int2> operator/=(Int2 &lhs, RValue<Int2> rhs)
4194 //      {
4195 //              return lhs = lhs / rhs;
4196 //      }
4197
4198 //      RValue<Int2> operator%=(Int2 &lhs, RValue<Int2> rhs)
4199 //      {
4200 //              return lhs = lhs % rhs;
4201 //      }
4202
4203         RValue<Int2> operator&=(Int2 &lhs, RValue<Int2> rhs)
4204         {
4205                 return lhs = lhs & rhs;
4206         }
4207
4208         RValue<Int2> operator|=(Int2 &lhs, RValue<Int2> rhs)
4209         {
4210                 return lhs = lhs | rhs;
4211         }
4212
4213         RValue<Int2> operator^=(Int2 &lhs, RValue<Int2> rhs)
4214         {
4215                 return lhs = lhs ^ rhs;
4216         }
4217
4218         RValue<Int2> operator<<=(Int2 &lhs, unsigned char rhs)
4219         {
4220                 return lhs = lhs << rhs;
4221         }
4222
4223         RValue<Int2> operator>>=(Int2 &lhs, unsigned char rhs)
4224         {
4225                 return lhs = lhs >> rhs;
4226         }
4227
4228 //      RValue<Int2> operator+(RValue<Int2> val)
4229 //      {
4230 //              return val;
4231 //      }
4232
4233 //      RValue<Int2> operator-(RValue<Int2> val)
4234 //      {
4235 //              return RValue<Int2>(Nucleus::createNeg(val.value));
4236 //      }
4237
4238         RValue<Int2> operator~(RValue<Int2> val)
4239         {
4240                 return RValue<Int2>(Nucleus::createNot(val.value));
4241         }
4242
4243         RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y)
4244         {
4245                 int shuffle[4] = {0, 4, 1, 5};   // Real type is v4i32
4246                 return As<Short4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
4247         }
4248
4249         RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
4250         {
4251                 int shuffle[4] = {0, 4, 1, 5};   // Real type is v4i32
4252                 auto lowHigh = RValue<Int4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
4253                 return As<Short4>(Swizzle(lowHigh, 0xEE));
4254         }
4255
4256         RValue<Int> Extract(RValue<Int2> val, int i)
4257         {
4258                 return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i));
4259         }
4260
4261         RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i)
4262         {
4263                 return RValue<Int2>(Nucleus::createInsertElement(val.value, element.value, i));
4264         }
4265
4266         Type *Int2::getType()
4267         {
4268                 return T(Type_v2i32);
4269         }
4270
4271         UInt2::UInt2(unsigned int x, unsigned int y)
4272         {
4273                 int64_t constantVector[2] = {x, y};
4274                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4275         }
4276
4277         UInt2::UInt2(RValue<UInt2> rhs)
4278         {
4279                 storeValue(rhs.value);
4280         }
4281
4282         UInt2::UInt2(const UInt2 &rhs)
4283         {
4284                 Value *value = rhs.loadValue();
4285                 storeValue(value);
4286         }
4287
4288         UInt2::UInt2(const Reference<UInt2> &rhs)
4289         {
4290                 Value *value = rhs.loadValue();
4291                 storeValue(value);
4292         }
4293
4294         RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs)
4295         {
4296                 storeValue(rhs.value);
4297
4298                 return rhs;
4299         }
4300
4301         RValue<UInt2> UInt2::operator=(const UInt2 &rhs)
4302         {
4303                 Value *value = rhs.loadValue();
4304                 storeValue(value);
4305
4306                 return RValue<UInt2>(value);
4307         }
4308
4309         RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs)
4310         {
4311                 Value *value = rhs.loadValue();
4312                 storeValue(value);
4313
4314                 return RValue<UInt2>(value);
4315         }
4316
4317         RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs)
4318         {
4319                 return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
4320         }
4321
4322         RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs)
4323         {
4324                 return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
4325         }
4326
4327 //      RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs)
4328 //      {
4329 //              return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value));
4330 //      }
4331
4332 //      RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs)
4333 //      {
4334 //              return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value));
4335 //      }
4336
4337 //      RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs)
4338 //      {
4339 //              return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value));
4340 //      }
4341
4342         RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs)
4343         {
4344                 return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
4345         }
4346
4347         RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs)
4348         {
4349                 return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
4350         }
4351
4352         RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs)
4353         {
4354                 return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
4355         }
4356
4357         RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
4358         {
4359         //      return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
4360
4361                 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
4362         }
4363
4364         RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
4365         {
4366         //      return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
4367
4368                 return x86::psrld(lhs, rhs);
4369         }
4370
4371         RValue<UInt2> operator+=(UInt2 &lhs, RValue<UInt2> rhs)
4372         {
4373                 return lhs = lhs + rhs;
4374         }
4375
4376         RValue<UInt2> operator-=(UInt2 &lhs, RValue<UInt2> rhs)
4377         {
4378                 return lhs = lhs - rhs;
4379         }
4380
4381 //      RValue<UInt2> operator*=(UInt2 &lhs, RValue<UInt2> rhs)
4382 //      {
4383 //              return lhs = lhs * rhs;
4384 //      }
4385
4386 //      RValue<UInt2> operator/=(UInt2 &lhs, RValue<UInt2> rhs)
4387 //      {
4388 //              return lhs = lhs / rhs;
4389 //      }
4390
4391 //      RValue<UInt2> operator%=(UInt2 &lhs, RValue<UInt2> rhs)
4392 //      {
4393 //              return lhs = lhs % rhs;
4394 //      }
4395
4396         RValue<UInt2> operator&=(UInt2 &lhs, RValue<UInt2> rhs)
4397         {
4398                 return lhs = lhs & rhs;
4399         }
4400
4401         RValue<UInt2> operator|=(UInt2 &lhs, RValue<UInt2> rhs)
4402         {
4403                 return lhs = lhs | rhs;
4404         }
4405
4406         RValue<UInt2> operator^=(UInt2 &lhs, RValue<UInt2> rhs)
4407         {
4408                 return lhs = lhs ^ rhs;
4409         }
4410
4411         RValue<UInt2> operator<<=(UInt2 &lhs, unsigned char rhs)
4412         {
4413                 return lhs = lhs << rhs;
4414         }
4415
4416         RValue<UInt2> operator>>=(UInt2 &lhs, unsigned char rhs)
4417         {
4418                 return lhs = lhs >> rhs;
4419         }
4420
4421 //      RValue<UInt2> operator+(RValue<UInt2> val)
4422 //      {
4423 //              return val;
4424 //      }
4425
4426 //      RValue<UInt2> operator-(RValue<UInt2> val)
4427 //      {
4428 //              return RValue<UInt2>(Nucleus::createNeg(val.value));
4429 //      }
4430
4431         RValue<UInt2> operator~(RValue<UInt2> val)
4432         {
4433                 return RValue<UInt2>(Nucleus::createNot(val.value));
4434         }
4435
4436         Type *UInt2::getType()
4437         {
4438                 return T(Type_v2i32);
4439         }
4440
4441         Int4::Int4(RValue<Byte4> cast)
4442         {
4443                 if(CPUID::supportsSSE4_1())
4444                 {
4445                         *this = x86::pmovzxbd(As<Byte16>(cast));
4446                 }
4447                 else
4448                 {
4449                         int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
4450                         Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
4451                         Value *b = Nucleus::createShuffleVector(a, V(Nucleus::createNullValue(Byte16::getType())), swizzle);
4452
4453                         int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
4454                         Value *c = Nucleus::createBitCast(b, Short8::getType());
4455                         Value *d = Nucleus::createShuffleVector(c, V(Nucleus::createNullValue(Short8::getType())), swizzle2);
4456
4457                         *this = As<Int4>(d);
4458                 }
4459         }
4460
4461         Int4::Int4(RValue<SByte4> cast)
4462         {
4463                 if(CPUID::supportsSSE4_1())
4464                 {
4465                         *this = x86::pmovsxbd(As<SByte16>(cast));
4466                 }
4467                 else
4468                 {
4469                         int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
4470                         Value *a = Nucleus::createBitCast(cast.value, Byte16::getType());
4471                         Value *b = Nucleus::createShuffleVector(a, a, swizzle);
4472
4473                         int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
4474                         Value *c = Nucleus::createBitCast(b, Short8::getType());
4475                         Value *d = Nucleus::createShuffleVector(c, c, swizzle2);
4476
4477                         *this = As<Int4>(d) >> 24;
4478                 }
4479         }
4480
4481         Int4::Int4(RValue<Float4> cast)
4482         {
4483                 Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType());
4484
4485                 storeValue(xyzw);
4486         }
4487
4488         Int4::Int4(RValue<Short4> cast)
4489         {
4490                 if(CPUID::supportsSSE4_1())
4491                 {
4492                         *this = x86::pmovsxwd(As<Short8>(cast));
4493                 }
4494                 else
4495                 {
4496                         int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
4497                         Value *c = Nucleus::createShuffleVector(cast.value, cast.value, swizzle);
4498                         *this = As<Int4>(c) >> 16;
4499                 }
4500         }
4501
4502         Int4::Int4(RValue<UShort4> cast)
4503         {
4504                 if(CPUID::supportsSSE4_1())
4505                 {
4506                         *this = x86::pmovzxwd(As<UShort8>(cast));
4507                 }
4508                 else
4509                 {
4510                         int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
4511                         Value *c = Nucleus::createShuffleVector(cast.value, Short8(0, 0, 0, 0, 0, 0, 0, 0).loadValue(), swizzle);
4512                         *this = As<Int4>(c);
4513                 }
4514         }
4515
4516         Int4::Int4(int xyzw)
4517         {
4518                 constant(xyzw, xyzw, xyzw, xyzw);
4519         }
4520
4521         Int4::Int4(int x, int yzw)
4522         {
4523                 constant(x, yzw, yzw, yzw);
4524         }
4525
4526         Int4::Int4(int x, int y, int zw)
4527         {
4528                 constant(x, y, zw, zw);
4529         }
4530
4531         Int4::Int4(int x, int y, int z, int w)
4532         {
4533                 constant(x, y, z, w);
4534         }
4535
4536         void Int4::constant(int x, int y, int z, int w)
4537         {
4538                 int64_t constantVector[4] = {x, y, z, w};
4539                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4540         }
4541
4542         Int4::Int4(RValue<Int4> rhs)
4543         {
4544                 storeValue(rhs.value);
4545         }
4546
4547         Int4::Int4(const Int4 &rhs)
4548         {
4549                 Value *value = rhs.loadValue();
4550                 storeValue(value);
4551         }
4552
4553         Int4::Int4(const Reference<Int4> &rhs)
4554         {
4555                 Value *value = rhs.loadValue();
4556                 storeValue(value);
4557         }
4558
4559         Int4::Int4(RValue<UInt4> rhs)
4560         {
4561                 storeValue(rhs.value);
4562         }
4563
4564         Int4::Int4(const UInt4 &rhs)
4565         {
4566                 Value *value = rhs.loadValue();
4567                 storeValue(value);
4568         }
4569
4570         Int4::Int4(const Reference<UInt4> &rhs)
4571         {
4572                 Value *value = rhs.loadValue();
4573                 storeValue(value);
4574         }
4575
4576         Int4::Int4(RValue<Int2> lo, RValue<Int2> hi)
4577         {
4578                 int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
4579                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
4580
4581                 storeValue(packed);
4582         }
4583
4584         Int4::Int4(RValue<Int> rhs)
4585         {
4586                 Value *vector = loadValue();
4587                 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
4588
4589                 int swizzle[4] = {0, 0, 0, 0};
4590                 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
4591
4592                 storeValue(replicate);
4593         }
4594
4595         Int4::Int4(const Int &rhs)
4596         {
4597                 *this = RValue<Int>(rhs.loadValue());
4598         }
4599
4600         Int4::Int4(const Reference<Int> &rhs)
4601         {
4602                 *this = RValue<Int>(rhs.loadValue());
4603         }
4604
4605         RValue<Int4> Int4::operator=(RValue<Int4> rhs)
4606         {
4607                 storeValue(rhs.value);
4608
4609                 return rhs;
4610         }
4611
4612         RValue<Int4> Int4::operator=(const Int4 &rhs)
4613         {
4614                 Value *value = rhs.loadValue();
4615                 storeValue(value);
4616
4617                 return RValue<Int4>(value);
4618         }
4619
4620         RValue<Int4> Int4::operator=(const Reference<Int4> &rhs)
4621         {
4622                 Value *value = rhs.loadValue();
4623                 storeValue(value);
4624
4625                 return RValue<Int4>(value);
4626         }
4627
4628         RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs)
4629         {
4630                 return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value));
4631         }
4632
4633         RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs)
4634         {
4635                 return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value));
4636         }
4637
4638         RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs)
4639         {
4640                 return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value));
4641         }
4642
4643         RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs)
4644         {
4645                 return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value));
4646         }
4647
4648         RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs)
4649         {
4650                 return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value));
4651         }
4652
4653         RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs)
4654         {
4655                 return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value));
4656         }
4657
4658         RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs)
4659         {
4660                 return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value));
4661         }
4662
4663         RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs)
4664         {
4665                 return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value));
4666         }
4667
4668         RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
4669         {
4670                 return x86::pslld(lhs, rhs);
4671         }
4672
4673         RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
4674         {
4675                 return x86::psrad(lhs, rhs);
4676         }
4677
4678         RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
4679         {
4680                 return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value));
4681         }
4682
4683         RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs)
4684         {
4685                 return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value));
4686         }
4687
4688         RValue<Int4> operator+=(Int4 &lhs, RValue<Int4> rhs)
4689         {
4690                 return lhs = lhs + rhs;
4691         }
4692
4693         RValue<Int4> operator-=(Int4 &lhs, RValue<Int4> rhs)
4694         {
4695                 return lhs = lhs - rhs;
4696         }
4697
4698         RValue<Int4> operator*=(Int4 &lhs, RValue<Int4> rhs)
4699         {
4700                 return lhs = lhs * rhs;
4701         }
4702
4703 //      RValue<Int4> operator/=(Int4 &lhs, RValue<Int4> rhs)
4704 //      {
4705 //              return lhs = lhs / rhs;
4706 //      }
4707
4708 //      RValue<Int4> operator%=(Int4 &lhs, RValue<Int4> rhs)
4709 //      {
4710 //              return lhs = lhs % rhs;
4711 //      }
4712
4713         RValue<Int4> operator&=(Int4 &lhs, RValue<Int4> rhs)
4714         {
4715                 return lhs = lhs & rhs;
4716         }
4717
4718         RValue<Int4> operator|=(Int4 &lhs, RValue<Int4> rhs)
4719         {
4720                 return lhs = lhs | rhs;
4721         }
4722
4723         RValue<Int4> operator^=(Int4 &lhs, RValue<Int4> rhs)
4724         {
4725                 return lhs = lhs ^ rhs;
4726         }
4727
4728         RValue<Int4> operator<<=(Int4 &lhs, unsigned char rhs)
4729         {
4730                 return lhs = lhs << rhs;
4731         }
4732
4733         RValue<Int4> operator>>=(Int4 &lhs, unsigned char rhs)
4734         {
4735                 return lhs = lhs >> rhs;
4736         }
4737
4738         RValue<Int4> operator+(RValue<Int4> val)
4739         {
4740                 return val;
4741         }
4742
4743         RValue<Int4> operator-(RValue<Int4> val)
4744         {
4745                 return RValue<Int4>(Nucleus::createNeg(val.value));
4746         }
4747
4748         RValue<Int4> operator~(RValue<Int4> val)
4749         {
4750                 return RValue<Int4>(Nucleus::createNot(val.value));
4751         }
4752
4753         RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
4754         {
4755                 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
4756                 //        Restore the following line when LLVM is updated to a version where this issue is fixed.
4757                 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
4758                 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
4759         }
4760
4761         RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
4762         {
4763                 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType()));
4764         }
4765
4766         RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
4767         {
4768                 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
4769                 //        Restore the following line when LLVM is updated to a version where this issue is fixed.
4770                 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType()));
4771                 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
4772         }
4773
4774         RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
4775         {
4776                 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
4777         }
4778
4779         RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
4780         {
4781                 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
4782                 //        Restore the following line when LLVM is updated to a version where this issue is fixed.
4783                 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType()));
4784                 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
4785         }
4786
4787         RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
4788         {
4789                 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType()));
4790         }
4791
4792         RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
4793         {
4794                 if(CPUID::supportsSSE4_1())
4795                 {
4796                         return x86::pmaxsd(x, y);
4797                 }
4798                 else
4799                 {
4800                         RValue<Int4> greater = CmpNLE(x, y);
4801                         return (x & greater) | (y & ~greater);
4802                 }
4803         }
4804
4805         RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
4806         {
4807                 if(CPUID::supportsSSE4_1())
4808                 {
4809                         return x86::pminsd(x, y);
4810                 }
4811                 else
4812                 {
4813                         RValue<Int4> less = CmpLT(x, y);
4814                         return (x & less) | (y & ~less);
4815                 }
4816         }
4817
4818         RValue<Int4> RoundInt(RValue<Float4> cast)
4819         {
4820                 return x86::cvtps2dq(cast);
4821         }
4822
4823         RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y)
4824         {
4825                 return x86::packssdw(x, y);
4826         }
4827
4828         RValue<Int> Extract(RValue<Int4> x, int i)
4829         {
4830                 return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i));
4831         }
4832
4833         RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i)
4834         {
4835                 return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i));
4836         }
4837
4838         RValue<Int> SignMask(RValue<Int4> x)
4839         {
4840                 return x86::movmskps(As<Float4>(x));
4841         }
4842
4843         RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select)
4844         {
4845                 return RValue<Int4>(createSwizzle4(x.value, select));
4846         }
4847
4848         Type *Int4::getType()
4849         {
4850                 return T(llvm::VectorType::get(T(Int::getType()), 4));
4851         }
4852
4853         UInt4::UInt4(RValue<Float4> cast)
4854         {
4855                 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
4856                 // Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
4857
4858                 // Smallest positive value representable in UInt, but not in Int
4859                 const unsigned int ustart = 0x80000000u;
4860                 const float ustartf = float(ustart);
4861
4862                 // Check if the value can be represented as an Int
4863                 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
4864                 // If the value is too large, subtract ustart and re-add it after conversion.
4865                 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
4866                 // Otherwise, just convert normally
4867                           (~uiValue & Int4(cast));
4868                 // If the value is negative, store 0, otherwise store the result of the conversion
4869                 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
4870         }
4871
4872         UInt4::UInt4(int xyzw)
4873         {
4874                 constant(xyzw, xyzw, xyzw, xyzw);
4875         }
4876
4877         UInt4::UInt4(int x, int yzw)
4878         {
4879                 constant(x, yzw, yzw, yzw);
4880         }
4881
4882         UInt4::UInt4(int x, int y, int zw)
4883         {
4884                 constant(x, y, zw, zw);
4885         }
4886
4887         UInt4::UInt4(int x, int y, int z, int w)
4888         {
4889                 constant(x, y, z, w);
4890         }
4891
4892         void UInt4::constant(int x, int y, int z, int w)
4893         {
4894                 int64_t constantVector[4] = {x, y, z, w};
4895                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4896         }
4897
4898         UInt4::UInt4(RValue<UInt4> rhs)
4899         {
4900                 storeValue(rhs.value);
4901         }
4902
4903         UInt4::UInt4(const UInt4 &rhs)
4904         {
4905                 Value *value = rhs.loadValue();
4906                 storeValue(value);
4907         }
4908
4909         UInt4::UInt4(const Reference<UInt4> &rhs)
4910         {
4911                 Value *value = rhs.loadValue();
4912                 storeValue(value);
4913         }
4914
4915         UInt4::UInt4(RValue<Int4> rhs)
4916         {
4917                 storeValue(rhs.value);
4918         }
4919
4920         UInt4::UInt4(const Int4 &rhs)
4921         {
4922                 Value *value = rhs.loadValue();
4923                 storeValue(value);
4924         }
4925
4926         UInt4::UInt4(const Reference<Int4> &rhs)
4927         {
4928                 Value *value = rhs.loadValue();
4929                 storeValue(value);
4930         }
4931
4932         UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi)
4933         {
4934                 int shuffle[4] = {0, 1, 4, 5};   // Real type is v4i32
4935                 Value *packed = Nucleus::createShuffleVector(lo.value, hi.value, shuffle);
4936
4937                 storeValue(packed);
4938         }
4939
4940         RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs)
4941         {
4942                 storeValue(rhs.value);
4943
4944                 return rhs;
4945         }
4946
4947         RValue<UInt4> UInt4::operator=(const UInt4 &rhs)
4948         {
4949                 Value *value = rhs.loadValue();
4950                 storeValue(value);
4951
4952                 return RValue<UInt4>(value);
4953         }
4954
4955         RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs)
4956         {
4957                 Value *value = rhs.loadValue();
4958                 storeValue(value);
4959
4960                 return RValue<UInt4>(value);
4961         }
4962
4963         RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs)
4964         {
4965                 return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value));
4966         }
4967
4968         RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs)
4969         {
4970                 return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value));
4971         }
4972
4973         RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs)
4974         {
4975                 return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value));
4976         }
4977
4978         RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs)
4979         {
4980                 return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value));
4981         }
4982
4983         RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs)
4984         {
4985                 return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value));
4986         }
4987
4988         RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs)
4989         {
4990                 return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value));
4991         }
4992
4993         RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs)
4994         {
4995                 return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value));
4996         }
4997
4998         RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs)
4999         {
5000                 return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
5001         }
5002
5003         RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
5004         {
5005                 return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
5006         }
5007
5008         RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
5009         {
5010                 return x86::psrld(lhs, rhs);
5011         }
5012
5013         RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)
5014         {
5015                 return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value));
5016         }
5017
5018         RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs)
5019         {
5020                 return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value));
5021         }
5022
5023         RValue<UInt4> operator+=(UInt4 &lhs, RValue<UInt4> rhs)
5024         {
5025                 return lhs = lhs + rhs;
5026         }
5027
5028         RValue<UInt4> operator-=(UInt4 &lhs, RValue<UInt4> rhs)
5029         {
5030                 return lhs = lhs - rhs;
5031         }
5032
5033         RValue<UInt4> operator*=(UInt4 &lhs, RValue<UInt4> rhs)
5034         {
5035                 return lhs = lhs * rhs;
5036         }
5037
5038 //      RValue<UInt4> operator/=(UInt4 &lhs, RValue<UInt4> rhs)
5039 //      {
5040 //              return lhs = lhs / rhs;
5041 //      }
5042
5043 //      RValue<UInt4> operator%=(UInt4 &lhs, RValue<UInt4> rhs)
5044 //      {
5045 //              return lhs = lhs % rhs;
5046 //      }
5047
5048         RValue<UInt4> operator&=(UInt4 &lhs, RValue<UInt4> rhs)
5049         {
5050                 return lhs = lhs & rhs;
5051         }
5052
5053         RValue<UInt4> operator|=(UInt4 &lhs, RValue<UInt4> rhs)
5054         {
5055                 return lhs = lhs | rhs;
5056         }
5057
5058         RValue<UInt4> operator^=(UInt4 &lhs, RValue<UInt4> rhs)
5059         {
5060                 return lhs = lhs ^ rhs;
5061         }
5062
5063         RValue<UInt4> operator<<=(UInt4 &lhs, unsigned char rhs)
5064         {
5065                 return lhs = lhs << rhs;
5066         }
5067
5068         RValue<UInt4> operator>>=(UInt4 &lhs, unsigned char rhs)
5069         {
5070                 return lhs = lhs >> rhs;
5071         }
5072
5073         RValue<UInt4> operator+(RValue<UInt4> val)
5074         {
5075                 return val;
5076         }
5077
5078         RValue<UInt4> operator-(RValue<UInt4> val)
5079         {
5080                 return RValue<UInt4>(Nucleus::createNeg(val.value));
5081         }
5082
5083         RValue<UInt4> operator~(RValue<UInt4> val)
5084         {
5085                 return RValue<UInt4>(Nucleus::createNot(val.value));
5086         }
5087
5088         RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
5089         {
5090                 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5091                 //        Restore the following line when LLVM is updated to a version where this issue is fixed.
5092                 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
5093                 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
5094         }
5095
5096         RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
5097         {
5098                 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType()));
5099         }
5100
5101         RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
5102         {
5103                 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5104                 //        Restore the following line when LLVM is updated to a version where this issue is fixed.
5105                 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType()));
5106                 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
5107         }
5108
5109         RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
5110         {
5111                 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
5112         }
5113
5114         RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
5115         {
5116                 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5117                 //        Restore the following line when LLVM is updated to a version where this issue is fixed.
5118                 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType()));
5119                 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
5120         }
5121
5122         RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
5123         {
5124                 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType()));
5125         }
5126
5127         RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
5128         {
5129                 if(CPUID::supportsSSE4_1())
5130                 {
5131                         return x86::pmaxud(x, y);
5132                 }
5133                 else
5134                 {
5135                         RValue<UInt4> greater = CmpNLE(x, y);
5136                         return (x & greater) | (y & ~greater);
5137                 }
5138         }
5139
5140         RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
5141         {
5142                 if(CPUID::supportsSSE4_1())
5143                 {
5144                         return x86::pminud(x, y);
5145                 }
5146                 else
5147                 {
5148                         RValue<UInt4> less = CmpLT(x, y);
5149                         return (x & less) | (y & ~less);
5150                 }
5151         }
5152
5153         RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y)
5154         {
5155                 return x86::packusdw(As<Int4>(x), As<Int4>(y));
5156         }
5157
5158         Type *UInt4::getType()
5159         {
5160                 return T(llvm::VectorType::get(T(UInt::getType()), 4));
5161         }
5162
5163         Float::Float(RValue<Int> cast)
5164         {
5165                 Value *integer = Nucleus::createSIToFP(cast.value, Float::getType());
5166
5167                 storeValue(integer);
5168         }
5169
5170         Float::Float(RValue<UInt> cast)
5171         {
5172                 RValue<Float> result = Float(Int(cast & UInt(0x7FFFFFFF))) +
5173                                        As<Float>((As<Int>(cast) >> 31) & As<Int>(Float(0x80000000u)));
5174
5175                 storeValue(result.value);
5176         }
5177
5178         Float::Float(float x)
5179         {
5180                 storeValue(Nucleus::createConstantFloat(x));
5181         }
5182
5183         Float::Float(RValue<Float> rhs)
5184         {
5185                 storeValue(rhs.value);
5186         }
5187
5188         Float::Float(const Float &rhs)
5189         {
5190                 Value *value = rhs.loadValue();
5191                 storeValue(value);
5192         }
5193
5194         Float::Float(const Reference<Float> &rhs)
5195         {
5196                 Value *value = rhs.loadValue();
5197                 storeValue(value);
5198         }
5199
5200         RValue<Float> Float::operator=(RValue<Float> rhs)
5201         {
5202                 storeValue(rhs.value);
5203
5204                 return rhs;
5205         }
5206
5207         RValue<Float> Float::operator=(const Float &rhs)
5208         {
5209                 Value *value = rhs.loadValue();
5210                 storeValue(value);
5211
5212                 return RValue<Float>(value);
5213         }
5214
5215         RValue<Float> Float::operator=(const Reference<Float> &rhs)
5216         {
5217                 Value *value = rhs.loadValue();
5218                 storeValue(value);
5219
5220                 return RValue<Float>(value);
5221         }
5222
5223         RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs)
5224         {
5225                 return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value));
5226         }
5227
5228         RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs)
5229         {
5230                 return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value));
5231         }
5232
5233         RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs)
5234         {
5235                 return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value));
5236         }
5237
5238         RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs)
5239         {
5240                 return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value));
5241         }
5242
5243         RValue<Float> operator+=(Float &lhs, RValue<Float> rhs)
5244         {
5245                 return lhs = lhs + rhs;
5246         }
5247
5248         RValue<Float> operator-=(Float &lhs, RValue<Float> rhs)
5249         {
5250                 return lhs = lhs - rhs;
5251         }
5252
5253         RValue<Float> operator*=(Float &lhs, RValue<Float> rhs)
5254         {
5255                 return lhs = lhs * rhs;
5256         }
5257
5258         RValue<Float> operator/=(Float &lhs, RValue<Float> rhs)
5259         {
5260                 return lhs = lhs / rhs;
5261         }
5262
5263         RValue<Float> operator+(RValue<Float> val)
5264         {
5265                 return val;
5266         }
5267
5268         RValue<Float> operator-(RValue<Float> val)
5269         {
5270                 return RValue<Float>(Nucleus::createFNeg(val.value));
5271         }
5272
5273         RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs)
5274         {
5275                 return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value));
5276         }
5277
5278         RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs)
5279         {
5280                 return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value));
5281         }
5282
5283         RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs)
5284         {
5285                 return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value));
5286         }
5287
5288         RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs)
5289         {
5290                 return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value));
5291         }
5292
5293         RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs)
5294         {
5295                 return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value));
5296         }
5297
5298         RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs)
5299         {
5300                 return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value));
5301         }
5302
5303         RValue<Float> Abs(RValue<Float> x)
5304         {
5305                 return IfThenElse(x > 0.0f, x, -x);
5306         }
5307
5308         RValue<Float> Max(RValue<Float> x, RValue<Float> y)
5309         {
5310                 return IfThenElse(x > y, x, y);
5311         }
5312
5313         RValue<Float> Min(RValue<Float> x, RValue<Float> y)
5314         {
5315                 return IfThenElse(x < y, x, y);
5316         }
5317
5318         RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
5319         {
5320                 #if defined(__i386__) || defined(__x86_64__)
5321                         if(exactAtPow2)
5322                         {
5323                                 // rcpss uses a piecewise-linear approximation which minimizes the relative error
5324                                 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
5325                                 return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
5326                         }
5327                 #endif
5328
5329                 return x86::rcpss(x);
5330         }
5331
5332         RValue<Float> RcpSqrt_pp(RValue<Float> x)
5333         {
5334                 return x86::rsqrtss(x);
5335         }
5336
5337         RValue<Float> Sqrt(RValue<Float> x)
5338         {
5339                 return x86::sqrtss(x);
5340         }
5341
5342         RValue<Float> Round(RValue<Float> x)
5343         {
5344                 if(CPUID::supportsSSE4_1())
5345                 {
5346                         return x86::roundss(x, 0);
5347                 }
5348                 else
5349                 {
5350                         return Float4(Round(Float4(x))).x;
5351                 }
5352         }
5353
5354         RValue<Float> Trunc(RValue<Float> x)
5355         {
5356                 if(CPUID::supportsSSE4_1())
5357                 {
5358                         return x86::roundss(x, 3);
5359                 }
5360                 else
5361                 {
5362                         return Float(Int(x));   // Rounded toward zero
5363                 }
5364         }
5365
5366         RValue<Float> Frac(RValue<Float> x)
5367         {
5368                 if(CPUID::supportsSSE4_1())
5369                 {
5370                         return x - x86::floorss(x);
5371                 }
5372                 else
5373                 {
5374                         return Float4(Frac(Float4(x))).x;
5375                 }
5376         }
5377
5378         RValue<Float> Floor(RValue<Float> x)
5379         {
5380                 if(CPUID::supportsSSE4_1())
5381                 {
5382                         return x86::floorss(x);
5383                 }
5384                 else
5385                 {
5386                         return Float4(Floor(Float4(x))).x;
5387                 }
5388         }
5389
5390         RValue<Float> Ceil(RValue<Float> x)
5391         {
5392                 if(CPUID::supportsSSE4_1())
5393                 {
5394                         return x86::ceilss(x);
5395                 }
5396                 else
5397                 {
5398                         return Float4(Ceil(Float4(x))).x;
5399                 }
5400         }
5401
5402         Type *Float::getType()
5403         {
5404                 return T(llvm::Type::getFloatTy(*::context));
5405         }
5406
5407         Float2::Float2(RValue<Float4> cast)
5408         {
5409                 storeValue(Nucleus::createBitCast(cast.value, getType()));
5410         }
5411
5412         Type *Float2::getType()
5413         {
5414                 return T(Type_v2f32);
5415         }
5416
5417         Float4::Float4(RValue<Byte4> cast) : FloatXYZW(this)
5418         {
5419                 Value *a = Int4(cast).loadValue();
5420                 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
5421
5422                 storeValue(xyzw);
5423         }
5424
5425         Float4::Float4(RValue<SByte4> cast) : FloatXYZW(this)
5426         {
5427                 Value *a = Int4(cast).loadValue();
5428                 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
5429
5430                 storeValue(xyzw);
5431         }
5432
5433         Float4::Float4(RValue<Short4> cast) : FloatXYZW(this)
5434         {
5435                 Int4 c(cast);
5436                 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
5437         }
5438
5439         Float4::Float4(RValue<UShort4> cast) : FloatXYZW(this)
5440         {
5441                 Int4 c(cast);
5442                 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
5443         }
5444
5445         Float4::Float4(RValue<Int4> cast) : FloatXYZW(this)
5446         {
5447                 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());
5448
5449                 storeValue(xyzw);
5450         }
5451
5452         Float4::Float4(RValue<UInt4> cast) : FloatXYZW(this)
5453         {
5454                 RValue<Float4> result = Float4(Int4(cast & UInt4(0x7FFFFFFF))) +
5455                                         As<Float4>((As<Int4>(cast) >> 31) & As<Int4>(Float4(0x80000000u)));
5456
5457                 storeValue(result.value);
5458         }
5459
5460         Float4::Float4() : FloatXYZW(this)
5461         {
5462         }
5463
5464         Float4::Float4(float xyzw) : FloatXYZW(this)
5465         {
5466                 constant(xyzw, xyzw, xyzw, xyzw);
5467         }
5468
5469         Float4::Float4(float x, float yzw) : FloatXYZW(this)
5470         {
5471                 constant(x, yzw, yzw, yzw);
5472         }
5473
5474         Float4::Float4(float x, float y, float zw) : FloatXYZW(this)
5475         {
5476                 constant(x, y, zw, zw);
5477         }
5478
5479         Float4::Float4(float x, float y, float z, float w) : FloatXYZW(this)
5480         {
5481                 constant(x, y, z, w);
5482         }
5483
5484         void Float4::constant(float x, float y, float z, float w)
5485         {
5486                 double constantVector[4] = {x, y, z, w};
5487                 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5488         }
5489
5490         Float4::Float4(RValue<Float4> rhs) : FloatXYZW(this)
5491         {
5492                 storeValue(rhs.value);
5493         }
5494
5495         Float4::Float4(const Float4 &rhs) : FloatXYZW(this)
5496         {
5497                 Value *value = rhs.loadValue();
5498                 storeValue(value);
5499         }
5500
5501         Float4::Float4(const Reference<Float4> &rhs) : FloatXYZW(this)
5502         {
5503                 Value *value = rhs.loadValue();
5504                 storeValue(value);
5505         }
5506
5507         Float4::Float4(RValue<Float> rhs) : FloatXYZW(this)
5508         {
5509                 Value *vector = loadValue();
5510                 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
5511
5512                 int swizzle[4] = {0, 0, 0, 0};
5513                 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
5514
5515                 storeValue(replicate);
5516         }
5517
5518         Float4::Float4(const Float &rhs) : FloatXYZW(this)
5519         {
5520                 *this = RValue<Float>(rhs.loadValue());
5521         }
5522
5523         Float4::Float4(const Reference<Float> &rhs) : FloatXYZW(this)
5524         {
5525                 *this = RValue<Float>(rhs.loadValue());
5526         }
5527
5528         RValue<Float4> Float4::operator=(float x)
5529         {
5530                 return *this = Float4(x, x, x, x);
5531         }
5532
5533         RValue<Float4> Float4::operator=(RValue<Float4> rhs)
5534         {
5535                 storeValue(rhs.value);
5536
5537                 return rhs;
5538         }
5539
5540         RValue<Float4> Float4::operator=(const Float4 &rhs)
5541         {
5542                 Value *value = rhs.loadValue();
5543                 storeValue(value);
5544
5545                 return RValue<Float4>(value);
5546         }
5547
5548         RValue<Float4> Float4::operator=(const Reference<Float4> &rhs)
5549         {
5550                 Value *value = rhs.loadValue();
5551                 storeValue(value);
5552
5553                 return RValue<Float4>(value);
5554         }
5555
5556         RValue<Float4> Float4::operator=(RValue<Float> rhs)
5557         {
5558                 return *this = Float4(rhs);
5559         }
5560
5561         RValue<Float4> Float4::operator=(const Float &rhs)
5562         {
5563                 return *this = Float4(rhs);
5564         }
5565
5566         RValue<Float4> Float4::operator=(const Reference<Float> &rhs)
5567         {
5568                 return *this = Float4(rhs);
5569         }
5570
5571         RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs)
5572         {
5573                 return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value));
5574         }
5575
5576         RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs)
5577         {
5578                 return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value));
5579         }
5580
5581         RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs)
5582         {
5583                 return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value));
5584         }
5585
5586         RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs)
5587         {
5588                 return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value));
5589         }
5590
5591         RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
5592         {
5593                 return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value));
5594         }
5595
5596         RValue<Float4> operator+=(Float4 &lhs, RValue<Float4> rhs)
5597         {
5598                 return lhs = lhs + rhs;
5599         }
5600
5601         RValue<Float4> operator-=(Float4 &lhs, RValue<Float4> rhs)
5602         {
5603                 return lhs = lhs - rhs;
5604         }
5605
5606         RValue<Float4> operator*=(Float4 &lhs, RValue<Float4> rhs)
5607         {
5608                 return lhs = lhs * rhs;
5609         }
5610
5611         RValue<Float4> operator/=(Float4 &lhs, RValue<Float4> rhs)
5612         {
5613                 return lhs = lhs / rhs;
5614         }
5615
5616         RValue<Float4> operator%=(Float4 &lhs, RValue<Float4> rhs)
5617         {
5618                 return lhs = lhs % rhs;
5619         }
5620
5621         RValue<Float4> operator+(RValue<Float4> val)
5622         {
5623                 return val;
5624         }
5625
5626         RValue<Float4> operator-(RValue<Float4> val)
5627         {
5628                 return RValue<Float4>(Nucleus::createFNeg(val.value));
5629         }
5630
5631         RValue<Float4> Abs(RValue<Float4> x)
5632         {
5633                 Value *vector = Nucleus::createBitCast(x.value, Int4::getType());
5634                 int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
5635                 Value *result = Nucleus::createAnd(vector, V(Nucleus::createConstantVector(constantVector, Int4::getType())));
5636
5637                 return As<Float4>(result);
5638         }
5639
5640         RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
5641         {
5642                 return x86::maxps(x, y);
5643         }
5644
5645         RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
5646         {
5647                 return x86::minps(x, y);
5648         }
5649
5650         RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
5651         {
5652                 #if defined(__i386__) || defined(__x86_64__)
5653                         if(exactAtPow2)
5654                         {
5655                                 // rcpps uses a piecewise-linear approximation which minimizes the relative error
5656                                 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
5657                                 return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
5658                         }
5659                 #endif
5660
5661                 return x86::rcpps(x);
5662         }
5663
5664         RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
5665         {
5666                 return x86::rsqrtps(x);
5667         }
5668
5669         RValue<Float4> Sqrt(RValue<Float4> x)
5670         {
5671                 return x86::sqrtps(x);
5672         }
5673
5674         RValue<Float4> Insert(RValue<Float4> x, RValue<Float> element, int i)
5675         {
5676                 return RValue<Float4>(Nucleus::createInsertElement(x.value, element.value, i));
5677         }
5678
5679         RValue<Float> Extract(RValue<Float4> x, int i)
5680         {
5681                 return RValue<Float>(Nucleus::createExtractElement(x.value, Float::getType(), i));
5682         }
5683
5684         RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select)
5685         {
5686                 return RValue<Float4>(createSwizzle4(x.value, select));
5687         }
5688
5689         RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
5690         {
5691                 int shuffle[4] =
5692                 {
5693                         ((imm >> 0) & 0x03) + 0,
5694                         ((imm >> 2) & 0x03) + 0,
5695                         ((imm >> 4) & 0x03) + 4,
5696                         ((imm >> 6) & 0x03) + 4,
5697                 };
5698
5699                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
5700         }
5701
5702         RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y)
5703         {
5704                 int shuffle[4] = {0, 4, 1, 5};
5705                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
5706         }
5707
5708         RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y)
5709         {
5710                 int shuffle[4] = {2, 6, 3, 7};
5711                 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
5712         }
5713
5714         RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select)
5715         {
5716                 Value *vector = lhs.loadValue();
5717                 Value *result = createMask4(vector, rhs.value, select);
5718                 lhs.storeValue(result);
5719
5720                 return RValue<Float4>(result);
5721         }
5722
5723         RValue<Int> SignMask(RValue<Float4> x)
5724         {
5725                 return x86::movmskps(x);
5726         }
5727
5728         RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
5729         {
5730         //      return As<Int4>(x86::cmpeqps(x, y));
5731                 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
5732         }
5733
5734         RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
5735         {
5736         //      return As<Int4>(x86::cmpltps(x, y));
5737                 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
5738         }
5739
5740         RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
5741         {
5742         //      return As<Int4>(x86::cmpleps(x, y));
5743                 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
5744         }
5745
5746         RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
5747         {
5748         //      return As<Int4>(x86::cmpneqps(x, y));
5749                 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
5750         }
5751
5752         RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
5753         {
5754         //      return As<Int4>(x86::cmpnltps(x, y));
5755                 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
5756         }
5757
5758         RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
5759         {
5760         //      return As<Int4>(x86::cmpnleps(x, y));
5761                 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
5762         }
5763
5764         RValue<Float4> Round(RValue<Float4> x)
5765         {
5766                 if(CPUID::supportsSSE4_1())
5767                 {
5768                         return x86::roundps(x, 0);
5769                 }
5770                 else
5771                 {
5772                         return Float4(RoundInt(x));
5773                 }
5774         }
5775
5776         RValue<Float4> Trunc(RValue<Float4> x)
5777         {
5778                 if(CPUID::supportsSSE4_1())
5779                 {
5780                         return x86::roundps(x, 3);
5781                 }
5782                 else
5783                 {
5784                         return Float4(Int4(x));
5785                 }
5786         }
5787
5788         RValue<Float4> Frac(RValue<Float4> x)
5789         {
5790                 Float4 frc;
5791
5792                 if(CPUID::supportsSSE4_1())
5793                 {
5794                         frc = x - Floor(x);
5795                 }
5796                 else
5797                 {
5798                         frc = x - Float4(Int4(x));   // Signed fractional part.
5799
5800                         frc += As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1.0f)));   // Add 1.0 if negative.
5801                 }
5802
5803                 // x - floor(x) can be 1.0 for very small negative x.
5804                 // Clamp against the value just below 1.0.
5805                 return Min(frc, As<Float4>(Int4(0x3F7FFFFF)));
5806         }
5807
5808         RValue<Float4> Floor(RValue<Float4> x)
5809         {
5810                 if(CPUID::supportsSSE4_1())
5811                 {
5812                         return x86::floorps(x);
5813                 }
5814                 else
5815                 {
5816                         return x - Frac(x);
5817                 }
5818         }
5819
5820         RValue<Float4> Ceil(RValue<Float4> x)
5821         {
5822                 if(CPUID::supportsSSE4_1())
5823                 {
5824                         return x86::ceilps(x);
5825                 }
5826                 else
5827                 {
5828                         return -Floor(-x);
5829                 }
5830         }
5831
5832         Type *Float4::getType()
5833         {
5834                 return T(llvm::VectorType::get(T(Float::getType()), 4));
5835         }
5836
5837         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
5838         {
5839                 return lhs + RValue<Int>(Nucleus::createConstantInt(offset));
5840         }
5841
5842         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
5843         {
5844                 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, false));
5845         }
5846
5847         RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
5848         {
5849                 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value, true));
5850         }
5851
5852         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, int offset)
5853         {
5854                 return lhs = lhs + offset;
5855         }
5856
5857         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<Int> offset)
5858         {
5859                 return lhs = lhs + offset;
5860         }
5861
5862         RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<UInt> offset)
5863         {
5864                 return lhs = lhs + offset;
5865         }
5866
5867         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset)
5868         {
5869                 return lhs + -offset;
5870         }
5871
5872         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
5873         {
5874                 return lhs + -offset;
5875         }
5876
5877         RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
5878         {
5879                 return lhs + -offset;
5880         }
5881
5882         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, int offset)
5883         {
5884                 return lhs = lhs - offset;
5885         }
5886
5887         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset)
5888         {
5889                 return lhs = lhs - offset;
5890         }
5891
5892         RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset)
5893         {
5894                 return lhs = lhs - offset;
5895         }
5896
5897         void Return()
5898         {
5899                 Nucleus::createRetVoid();
5900                 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
5901                 Nucleus::createUnreachable();
5902         }
5903
5904         void Return(RValue<Int> ret)
5905         {
5906                 Nucleus::createRet(ret.value);
5907                 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
5908                 Nucleus::createUnreachable();
5909         }
5910
5911         void branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
5912         {
5913                 Nucleus::createCondBr(cmp.value, bodyBB, endBB);
5914                 Nucleus::setInsertBlock(bodyBB);
5915         }
5916
5917         RValue<Long> Ticks()
5918         {
5919                 llvm::Function *rdtsc = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::readcyclecounter);
5920
5921                 return RValue<Long>(V(::builder->CreateCall(rdtsc)));
5922         }
5923 }
5924
5925 namespace sw
5926 {
5927         namespace x86
5928         {
5929                 RValue<Int> cvtss2si(RValue<Float> val)
5930                 {
5931                         llvm::Function *cvtss2si = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_cvtss2si);
5932
5933                         Float4 vector;
5934                         vector.x = val;
5935
5936                         return RValue<Int>(V(::builder->CreateCall(cvtss2si, RValue<Float4>(vector).value)));
5937                 }
5938
5939                 RValue<Int4> cvtps2dq(RValue<Float4> val)
5940                 {
5941                         llvm::Function *cvtps2dq = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_cvtps2dq);
5942
5943                         return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, val.value)));
5944                 }
5945
5946                 RValue<Float> rcpss(RValue<Float> val)
5947                 {
5948                         llvm::Function *rcpss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ss);
5949
5950                         Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
5951
5952                         return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rcpss, vector)), Float::getType(), 0));
5953                 }
5954
5955                 RValue<Float> sqrtss(RValue<Float> val)
5956                 {
5957                         llvm::Function *sqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ss);
5958
5959                         Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
5960
5961                         return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(sqrtss, vector)), Float::getType(), 0));
5962                 }
5963
5964                 RValue<Float> rsqrtss(RValue<Float> val)
5965                 {
5966                         llvm::Function *rsqrtss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ss);
5967
5968                         Value *vector = Nucleus::createInsertElement(V(llvm::UndefValue::get(T(Float4::getType()))), val.value, 0);
5969
5970                         return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rsqrtss, vector)), Float::getType(), 0));
5971                 }
5972
5973                 RValue<Float4> rcpps(RValue<Float4> val)
5974                 {
5975                         llvm::Function *rcpps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rcp_ps);
5976
5977                         return RValue<Float4>(V(::builder->CreateCall(rcpps, val.value)));
5978                 }
5979
5980                 RValue<Float4> sqrtps(RValue<Float4> val)
5981                 {
5982                         llvm::Function *sqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_sqrt_ps);
5983
5984                         return RValue<Float4>(V(::builder->CreateCall(sqrtps, val.value)));
5985                 }
5986
5987                 RValue<Float4> rsqrtps(RValue<Float4> val)
5988                 {
5989                         llvm::Function *rsqrtps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_rsqrt_ps);
5990
5991                         return RValue<Float4>(V(::builder->CreateCall(rsqrtps, val.value)));
5992                 }
5993
5994                 RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
5995                 {
5996                         llvm::Function *maxps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_max_ps);
5997
5998                         return RValue<Float4>(V(::builder->CreateCall2(maxps, x.value, y.value)));
5999                 }
6000
6001                 RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
6002                 {
6003                         llvm::Function *minps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_min_ps);
6004
6005                         return RValue<Float4>(V(::builder->CreateCall2(minps, x.value, y.value)));
6006                 }
6007
6008                 RValue<Float> roundss(RValue<Float> val, unsigned char imm)
6009                 {
6010                         llvm::Function *roundss = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ss);
6011
6012                         Value *undef = V(llvm::UndefValue::get(T(Float4::getType())));
6013                         Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
6014
6015                         return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(roundss, undef, vector, V(Nucleus::createConstantInt(imm)))), Float::getType(), 0));
6016                 }
6017
6018                 RValue<Float> floorss(RValue<Float> val)
6019                 {
6020                         return roundss(val, 1);
6021                 }
6022
6023                 RValue<Float> ceilss(RValue<Float> val)
6024                 {
6025                         return roundss(val, 2);
6026                 }
6027
6028                 RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
6029                 {
6030                         llvm::Function *roundps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_round_ps);
6031
6032                         return RValue<Float4>(V(::builder->CreateCall2(roundps, val.value, V(Nucleus::createConstantInt(imm)))));
6033                 }
6034
6035                 RValue<Float4> floorps(RValue<Float4> val)
6036                 {
6037                         return roundps(val, 1);
6038                 }
6039
6040                 RValue<Float4> ceilps(RValue<Float4> val)
6041                 {
6042                         return roundps(val, 2);
6043                 }
6044
6045                 RValue<Int4> pabsd(RValue<Int4> x)
6046                 {
6047                         llvm::Function *pabsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_ssse3_pabs_d_128);
6048
6049                         return RValue<Int4>(V(::builder->CreateCall(pabsd, x.value)));
6050                 }
6051
6052                 RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
6053                 {
6054                         llvm::Function *paddsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_w);
6055
6056                         return As<Short4>(V(::builder->CreateCall2(paddsw, x.value, y.value)));
6057                 }
6058
6059                 RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
6060                 {
6061                         llvm::Function *psubsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_w);
6062
6063                         return As<Short4>(V(::builder->CreateCall2(psubsw, x.value, y.value)));
6064                 }
6065
6066                 RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
6067                 {
6068                         llvm::Function *paddusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_w);
6069
6070                         return As<UShort4>(V(::builder->CreateCall2(paddusw, x.value, y.value)));
6071                 }
6072
6073                 RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
6074                 {
6075                         llvm::Function *psubusw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_w);
6076
6077                         return As<UShort4>(V(::builder->CreateCall2(psubusw, x.value, y.value)));
6078                 }
6079
6080                 RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
6081                 {
6082                         llvm::Function *paddsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_padds_b);
6083
6084                         return As<SByte8>(V(::builder->CreateCall2(paddsb, x.value, y.value)));
6085                 }
6086
6087                 RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
6088                 {
6089                         llvm::Function *psubsb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubs_b);
6090
6091                         return As<SByte8>(V(::builder->CreateCall2(psubsb, x.value, y.value)));
6092                 }
6093
6094                 RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
6095                 {
6096                         llvm::Function *paddusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_paddus_b);
6097
6098                         return As<Byte8>(V(::builder->CreateCall2(paddusb, x.value, y.value)));
6099                 }
6100
6101                 RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
6102                 {
6103                         llvm::Function *psubusb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psubus_b);
6104
6105                         return As<Byte8>(V(::builder->CreateCall2(psubusb, x.value, y.value)));
6106                 }
6107
6108                 RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
6109                 {
6110                         llvm::Function *pavgw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pavg_w);
6111
6112                         return As<UShort4>(V(::builder->CreateCall2(pavgw, x.value, y.value)));
6113                 }
6114
6115                 RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
6116                 {
6117                         llvm::Function *pmaxsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmaxs_w);
6118
6119                         return As<Short4>(V(::builder->CreateCall2(pmaxsw, x.value, y.value)));
6120                 }
6121
6122                 RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
6123                 {
6124                         llvm::Function *pminsw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmins_w);
6125
6126                         return As<Short4>(V(::builder->CreateCall2(pminsw, x.value, y.value)));
6127                 }
6128
6129                 RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
6130                 {
6131                         llvm::Function *pcmpgtw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_w);
6132
6133                         return As<Short4>(V(::builder->CreateCall2(pcmpgtw, x.value, y.value)));
6134                 }
6135
6136                 RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
6137                 {
6138                         llvm::Function *pcmpeqw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_w);
6139
6140                         return As<Short4>(V(::builder->CreateCall2(pcmpeqw, x.value, y.value)));
6141                 }
6142
6143                 RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
6144                 {
6145                         llvm::Function *pcmpgtb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpgt_b);
6146
6147                         return As<Byte8>(V(::builder->CreateCall2(pcmpgtb, x.value, y.value)));
6148                 }
6149
6150                 RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
6151                 {
6152                         llvm::Function *pcmpeqb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pcmpeq_b);
6153
6154                         return As<Byte8>(V(::builder->CreateCall2(pcmpeqb, x.value, y.value)));
6155                 }
6156
6157                 RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
6158                 {
6159                         llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
6160
6161                         return As<Short4>(V(::builder->CreateCall2(packssdw, x.value, y.value)));
6162                 }
6163
6164                 RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
6165                 {
6166                         llvm::Function *packssdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packssdw_128);
6167
6168                         return RValue<Short8>(V(::builder->CreateCall2(packssdw, x.value, y.value)));
6169                 }
6170
6171                 RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
6172                 {
6173                         llvm::Function *packsswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packsswb_128);
6174
6175                         return As<SByte8>(V(::builder->CreateCall2(packsswb, x.value, y.value)));
6176                 }
6177
6178                 RValue<Byte8> packuswb(RValue<UShort4> x, RValue<UShort4> y)
6179                 {
6180                         llvm::Function *packuswb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_packuswb_128);
6181
6182                         return As<Byte8>(V(::builder->CreateCall2(packuswb, x.value, y.value)));
6183                 }
6184
6185                 RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y)
6186                 {
6187                         if(CPUID::supportsSSE4_1())
6188                         {
6189                                 llvm::Function *packusdw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_packusdw);
6190
6191                                 return RValue<UShort8>(V(::builder->CreateCall2(packusdw, x.value, y.value)));
6192                         }
6193                         else
6194                         {
6195                                 RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000);
6196                                 RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000);
6197
6198                                 return As<UShort8>(packssdw(bx, by) + Short8(0x8000u));
6199                         }
6200                 }
6201
6202                 RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
6203                 {
6204                         llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
6205
6206                         return As<UShort4>(V(::builder->CreateCall2(psrlw, x.value, V(Nucleus::createConstantInt(y)))));
6207                 }
6208
6209                 RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
6210                 {
6211                         llvm::Function *psrlw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_w);
6212
6213                         return RValue<UShort8>(V(::builder->CreateCall2(psrlw, x.value, V(Nucleus::createConstantInt(y)))));
6214                 }
6215
6216                 RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
6217                 {
6218                         llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
6219
6220                         return As<Short4>(V(::builder->CreateCall2(psraw, x.value, V(Nucleus::createConstantInt(y)))));
6221                 }
6222
6223                 RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
6224                 {
6225                         llvm::Function *psraw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_w);
6226
6227                         return RValue<Short8>(V(::builder->CreateCall2(psraw, x.value, V(Nucleus::createConstantInt(y)))));
6228                 }
6229
6230                 RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
6231                 {
6232                         llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
6233
6234                         return As<Short4>(V(::builder->CreateCall2(psllw, x.value, V(Nucleus::createConstantInt(y)))));
6235                 }
6236
6237                 RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
6238                 {
6239                         llvm::Function *psllw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_w);
6240
6241                         return RValue<Short8>(V(::builder->CreateCall2(psllw, x.value, V(Nucleus::createConstantInt(y)))));
6242                 }
6243
6244                 RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
6245                 {
6246                         llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
6247
6248                         return As<Int2>(V(::builder->CreateCall2(pslld, x.value, V(Nucleus::createConstantInt(y)))));
6249                 }
6250
6251                 RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
6252                 {
6253                         llvm::Function *pslld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pslli_d);
6254
6255                         return RValue<Int4>(V(::builder->CreateCall2(pslld, x.value, V(Nucleus::createConstantInt(y)))));
6256                 }
6257
6258                 RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
6259                 {
6260                         llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
6261
6262                         return As<Int2>(V(::builder->CreateCall2(psrad, x.value, V(Nucleus::createConstantInt(y)))));
6263                 }
6264
6265                 RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
6266                 {
6267                         llvm::Function *psrad = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrai_d);
6268
6269                         return RValue<Int4>(V(::builder->CreateCall2(psrad, x.value, V(Nucleus::createConstantInt(y)))));
6270                 }
6271
6272                 RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
6273                 {
6274                         llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
6275
6276                         return As<UInt2>(V(::builder->CreateCall2(psrld, x.value, V(Nucleus::createConstantInt(y)))));
6277                 }
6278
6279                 RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
6280                 {
6281                         llvm::Function *psrld = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_psrli_d);
6282
6283                         return RValue<UInt4>(V(::builder->CreateCall2(psrld, x.value, V(Nucleus::createConstantInt(y)))));
6284                 }
6285
6286                 RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
6287                 {
6288                         llvm::Function *pmaxsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxsd);
6289
6290                         return RValue<Int4>(V(::builder->CreateCall2(pmaxsd, x.value, y.value)));
6291                 }
6292
6293                 RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
6294                 {
6295                         llvm::Function *pminsd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminsd);
6296
6297                         return RValue<Int4>(V(::builder->CreateCall2(pminsd, x.value, y.value)));
6298                 }
6299
6300                 RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
6301                 {
6302                         llvm::Function *pmaxud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmaxud);
6303
6304                         return RValue<UInt4>(V(::builder->CreateCall2(pmaxud, x.value, y.value)));
6305                 }
6306
6307                 RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
6308                 {
6309                         llvm::Function *pminud = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pminud);
6310
6311                         return RValue<UInt4>(V(::builder->CreateCall2(pminud, x.value, y.value)));
6312                 }
6313
6314                 RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
6315                 {
6316                         llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
6317
6318                         return As<Short4>(V(::builder->CreateCall2(pmulhw, x.value, y.value)));
6319                 }
6320
6321                 RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
6322                 {
6323                         llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
6324
6325                         return As<UShort4>(V(::builder->CreateCall2(pmulhuw, x.value, y.value)));
6326                 }
6327
6328                 RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
6329                 {
6330                         llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
6331
6332                         return As<Int2>(V(::builder->CreateCall2(pmaddwd, x.value, y.value)));
6333                 }
6334
6335                 RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
6336                 {
6337                         llvm::Function *pmulhw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulh_w);
6338
6339                         return RValue<Short8>(V(::builder->CreateCall2(pmulhw, x.value, y.value)));
6340                 }
6341
6342                 RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
6343                 {
6344                         llvm::Function *pmulhuw = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmulhu_w);
6345
6346                         return RValue<UShort8>(V(::builder->CreateCall2(pmulhuw, x.value, y.value)));
6347                 }
6348
6349                 RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
6350                 {
6351                         llvm::Function *pmaddwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmadd_wd);
6352
6353                         return RValue<Int4>(V(::builder->CreateCall2(pmaddwd, x.value, y.value)));
6354                 }
6355
6356                 RValue<Int> movmskps(RValue<Float4> x)
6357                 {
6358                         llvm::Function *movmskps = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse_movmsk_ps);
6359
6360                         return RValue<Int>(V(::builder->CreateCall(movmskps, x.value)));
6361                 }
6362
6363                 RValue<Int> pmovmskb(RValue<Byte8> x)
6364                 {
6365                         llvm::Function *pmovmskb = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse2_pmovmskb_128);
6366
6367                         return RValue<Int>(V(::builder->CreateCall(pmovmskb, x.value))) & 0xFF;
6368                 }
6369
6370                 RValue<Int4> pmovzxbd(RValue<Byte16> x)
6371                 {
6372                         llvm::Function *pmovzxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxbd);
6373
6374                         return RValue<Int4>(V(::builder->CreateCall(pmovzxbd, x.value)));
6375                 }
6376
6377                 RValue<Int4> pmovsxbd(RValue<SByte16> x)
6378                 {
6379                         llvm::Function *pmovsxbd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxbd);
6380
6381                         return RValue<Int4>(V(::builder->CreateCall(pmovsxbd, x.value)));
6382                 }
6383
6384                 RValue<Int4> pmovzxwd(RValue<UShort8> x)
6385                 {
6386                         llvm::Function *pmovzxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovzxwd);
6387
6388                         return RValue<Int4>(V(::builder->CreateCall(pmovzxwd, x.value)));
6389                 }
6390
6391                 RValue<Int4> pmovsxwd(RValue<Short8> x)
6392                 {
6393                         llvm::Function *pmovsxwd = llvm::Intrinsic::getDeclaration(::module, llvm::Intrinsic::x86_sse41_pmovsxwd);
6394
6395                         return RValue<Int4>(V(::builder->CreateCall(pmovsxwd, x.value)));
6396                 }
6397         }
6398 }