1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "Nucleus.hpp"
17 #include "llvm/Support/IRBuilder.h"
18 #include "llvm/Function.h"
19 #include "llvm/GlobalVariable.h"
20 #include "llvm/Module.h"
21 #include "llvm/LLVMContext.h"
22 #include "llvm/Constants.h"
23 #include "llvm/Intrinsics.h"
24 #include "llvm/PassManager.h"
25 #include "llvm/Analysis/LoopPass.h"
26 #include "llvm/Transforms/Scalar.h"
27 #include "llvm/Target/TargetData.h"
28 #include "llvm/Target/TargetOptions.h"
29 #include "llvm/Support/TargetSelect.h"
30 #include "../lib/ExecutionEngine/JIT/JIT.h"
32 #include "LLVMRoutine.hpp"
33 #include "LLVMRoutineManager.hpp"
38 #include "MutexLock.hpp"
40 #include <xmmintrin.h>
43 #if defined(__x86_64__) && defined(_WIN32)
44 extern "C" void X86CompilationCallback()
46 assert(false); // UNIMPLEMENTED
52 bool (*CodeAnalystInitialize)() = 0;
53 void (*CodeAnalystCompleteJITLog)() = 0;
54 bool (*CodeAnalystLogJITCode)(const void *jitCodeStartAddr, unsigned int jitCodeSize, const wchar_t *functionName) = 0;
59 extern bool JITEmitDebugInfo;
64 sw::LLVMRoutineManager *routineManager = nullptr;
65 llvm::ExecutionEngine *executionEngine = nullptr;
66 llvm::IRBuilder<> *builder = nullptr;
67 llvm::LLVMContext *context = nullptr;
68 llvm::Module *module = nullptr;
69 llvm::Function *function = nullptr;
71 sw::BackoffLock codegenMutex;
78 Optimization optimization[10] = {InstructionCombining, Disabled};
80 class Type : public llvm::Type {};
81 class Value : public llvm::Value {};
82 class SwitchCases : public llvm::SwitchInst {};
83 class BasicBlock : public llvm::BasicBlock {};
85 inline Type *T(llvm::Type *t)
87 return reinterpret_cast<Type*>(t);
90 inline Value *V(llvm::Value *t)
92 return reinterpret_cast<Value*>(t);
95 inline std::vector<llvm::Type*> &T(std::vector<Type*> &t)
97 return reinterpret_cast<std::vector<llvm::Type*>&>(t);
100 inline BasicBlock *B(llvm::BasicBlock *t)
102 return reinterpret_cast<BasicBlock*>(t);
107 ::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe
109 InitializeNativeTarget();
110 JITEmitDebugInfo = false;
114 ::context = new LLVMContext();
117 ::module = new Module("", *::context);
118 ::routineManager = new LLVMRoutineManager();
120 #if defined(__x86_64__)
121 const char *architecture = "x86-64";
123 const char *architecture = "x86";
126 SmallVector<std::string, 1> MAttrs;
127 MAttrs.push_back(CPUID::supportsMMX() ? "+mmx" : "-mmx");
128 MAttrs.push_back(CPUID::supportsCMOV() ? "+cmov" : "-cmov");
129 MAttrs.push_back(CPUID::supportsSSE() ? "+sse" : "-sse");
130 MAttrs.push_back(CPUID::supportsSSE2() ? "+sse2" : "-sse2");
131 MAttrs.push_back(CPUID::supportsSSE3() ? "+sse3" : "-sse3");
132 MAttrs.push_back(CPUID::supportsSSSE3() ? "+ssse3" : "-ssse3");
133 MAttrs.push_back(CPUID::supportsSSE4_1() ? "+sse41" : "-sse41");
136 TargetMachine *targetMachine = EngineBuilder::selectTarget(::module, architecture, "", MAttrs, Reloc::Default, CodeModel::JITDefault, &error);
137 ::executionEngine = JIT::createJIT(::module, 0, ::routineManager, CodeGenOpt::Aggressive, true, targetMachine);
141 ::builder = new IRBuilder<>(*::context);
144 HMODULE CodeAnalyst = LoadLibrary("CAJitNtfyLib.dll");
147 CodeAnalystInitialize = (bool(*)())GetProcAddress(CodeAnalyst, "CAJIT_Initialize");
148 CodeAnalystCompleteJITLog = (void(*)())GetProcAddress(CodeAnalyst, "CAJIT_CompleteJITLog");
149 CodeAnalystLogJITCode = (bool(*)(const void*, unsigned int, const wchar_t*))GetProcAddress(CodeAnalyst, "CAJIT_LogJITCode");
151 CodeAnalystInitialize();
159 delete ::executionEngine;
160 ::executionEngine = nullptr;
162 ::routineManager = nullptr;
163 ::function = nullptr;
166 ::codegenMutex.unlock();
169 Routine *Nucleus::acquireRoutine(const wchar_t *name, bool runOptimizations)
171 if(::builder->GetInsertBlock()->empty() || !::builder->GetInsertBlock()->back().isTerminator())
173 llvm::Type *type = ::function->getReturnType();
181 createRet(V(UndefValue::get(type)));
188 raw_fd_ostream file("llvm-dump-unopt.txt", error);
189 ::module->print(file, 0);
200 raw_fd_ostream file("llvm-dump-opt.txt", error);
201 ::module->print(file, 0);
204 void *entry = ::executionEngine->getPointerToFunction(::function);
205 LLVMRoutine *routine = ::routineManager->acquireRoutine(entry);
207 if(CodeAnalystLogJITCode)
209 CodeAnalystLogJITCode(routine->getEntry(), routine->getCodeSize(), name);
215 void Nucleus::optimize()
217 static PassManager *passManager = nullptr;
221 passManager = new PassManager();
224 // NoInfsFPMath = true;
225 // NoNaNsFPMath = true;
227 passManager->add(new TargetData(*::executionEngine->getTargetData()));
228 passManager->add(createScalarReplAggregatesPass());
230 for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
232 switch(optimization[pass])
234 case Disabled: break;
235 case CFGSimplification: passManager->add(createCFGSimplificationPass()); break;
236 case LICM: passManager->add(createLICMPass()); break;
237 case AggressiveDCE: passManager->add(createAggressiveDCEPass()); break;
238 case GVN: passManager->add(createGVNPass()); break;
239 case InstructionCombining: passManager->add(createInstructionCombiningPass()); break;
240 case Reassociate: passManager->add(createReassociatePass()); break;
241 case DeadStoreElimination: passManager->add(createDeadStoreEliminationPass()); break;
242 case SCCP: passManager->add(createSCCPPass()); break;
243 case ScalarReplAggregates: passManager->add(createScalarReplAggregatesPass()); break;
250 passManager->run(*::module);
253 Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
255 // Need to allocate it in the entry block for mem2reg to work
256 llvm::BasicBlock &entryBlock = ::function->getEntryBlock();
258 Instruction *declaration;
262 declaration = new AllocaInst(type, Nucleus::createConstantInt(arraySize));
266 declaration = new AllocaInst(type, (Value*)0);
269 entryBlock.getInstList().push_front(declaration);
271 return V(declaration);
274 BasicBlock *Nucleus::createBasicBlock()
276 return B(BasicBlock::Create(*::context, "", ::function));
279 BasicBlock *Nucleus::getInsertBlock()
281 return B(::builder->GetInsertBlock());
284 void Nucleus::setInsertBlock(BasicBlock *basicBlock)
286 // assert(::builder->GetInsertBlock()->back().isTerminator());
287 return ::builder->SetInsertPoint(basicBlock);
290 void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
292 llvm::FunctionType *functionType = llvm::FunctionType::get(ReturnType, T(Params), false);
293 ::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", ::module);
294 ::function->setCallingConv(llvm::CallingConv::C);
296 ::builder->SetInsertPoint(BasicBlock::Create(*::context, "", ::function));
299 Value *Nucleus::getArgument(unsigned int index)
301 llvm::Function::arg_iterator args = ::function->arg_begin();
312 void Nucleus::createRetVoid()
316 ::builder->CreateRetVoid();
319 void Nucleus::createRet(Value *v)
323 ::builder->CreateRet(v);
326 void Nucleus::createBr(BasicBlock *dest)
328 ::builder->CreateBr(dest);
331 void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
333 ::builder->CreateCondBr(cond, ifTrue, ifFalse);
336 Value *Nucleus::createAdd(Value *lhs, Value *rhs)
338 return V(::builder->CreateAdd(lhs, rhs));
341 Value *Nucleus::createSub(Value *lhs, Value *rhs)
343 return V(::builder->CreateSub(lhs, rhs));
346 Value *Nucleus::createMul(Value *lhs, Value *rhs)
348 return V(::builder->CreateMul(lhs, rhs));
351 Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
353 return V(::builder->CreateUDiv(lhs, rhs));
356 Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
358 return V(::builder->CreateSDiv(lhs, rhs));
361 Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
363 return V(::builder->CreateFAdd(lhs, rhs));
366 Value *Nucleus::createFSub(Value *lhs, Value *rhs)
368 return V(::builder->CreateFSub(lhs, rhs));
371 Value *Nucleus::createFMul(Value *lhs, Value *rhs)
373 return V(::builder->CreateFMul(lhs, rhs));
376 Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
378 return V(::builder->CreateFDiv(lhs, rhs));
381 Value *Nucleus::createURem(Value *lhs, Value *rhs)
383 return V(::builder->CreateURem(lhs, rhs));
386 Value *Nucleus::createSRem(Value *lhs, Value *rhs)
388 return V(::builder->CreateSRem(lhs, rhs));
391 Value *Nucleus::createFRem(Value *lhs, Value *rhs)
393 return V(::builder->CreateFRem(lhs, rhs));
396 Value *Nucleus::createShl(Value *lhs, Value *rhs)
398 return V(::builder->CreateShl(lhs, rhs));
401 Value *Nucleus::createLShr(Value *lhs, Value *rhs)
403 return V(::builder->CreateLShr(lhs, rhs));
406 Value *Nucleus::createAShr(Value *lhs, Value *rhs)
408 return V(::builder->CreateAShr(lhs, rhs));
411 Value *Nucleus::createAnd(Value *lhs, Value *rhs)
413 return V(::builder->CreateAnd(lhs, rhs));
416 Value *Nucleus::createOr(Value *lhs, Value *rhs)
418 return V(::builder->CreateOr(lhs, rhs));
421 Value *Nucleus::createXor(Value *lhs, Value *rhs)
423 return V(::builder->CreateXor(lhs, rhs));
426 Value *Nucleus::createNeg(Value *v)
428 return V(::builder->CreateNeg(v));
431 Value *Nucleus::createFNeg(Value *v)
433 return V(::builder->CreateFNeg(v));
436 Value *Nucleus::createNot(Value *v)
438 return V(::builder->CreateNot(v));
441 Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align)
443 assert(ptr->getType()->getContainedType(0) == type);
444 return V(::builder->Insert(new LoadInst(ptr, "", isVolatile, align)));
447 Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align)
449 assert(ptr->getType()->getContainedType(0) == type);
450 ::builder->Insert(new StoreInst(value, ptr, isVolatile, align));
454 Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index)
456 assert(ptr->getType()->getContainedType(0) == type);
457 return V(::builder->CreateGEP(ptr, index));
460 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
462 return V(::builder->CreateAtomicRMW(AtomicRMWInst::Add, ptr, value, SequentiallyConsistent));
465 Value *Nucleus::createTrunc(Value *v, Type *destType)
467 return V(::builder->CreateTrunc(v, destType));
470 Value *Nucleus::createZExt(Value *v, Type *destType)
472 return V(::builder->CreateZExt(v, destType));
475 Value *Nucleus::createSExt(Value *v, Type *destType)
477 return V(::builder->CreateSExt(v, destType));
480 Value *Nucleus::createFPToSI(Value *v, Type *destType)
482 return V(::builder->CreateFPToSI(v, destType));
485 Value *Nucleus::createSIToFP(Value *v, Type *destType)
487 return V(::builder->CreateSIToFP(v, destType));
490 Value *Nucleus::createFPTrunc(Value *v, Type *destType)
492 return V(::builder->CreateFPTrunc(v, destType));
495 Value *Nucleus::createFPExt(Value *v, Type *destType)
497 return V(::builder->CreateFPExt(v, destType));
500 Value *Nucleus::createBitCast(Value *v, Type *destType)
502 return V(::builder->CreateBitCast(v, destType));
505 Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
507 return V(::builder->CreateICmpEQ(lhs, rhs));
510 Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
512 return V(::builder->CreateICmpNE(lhs, rhs));
515 Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
517 return V(::builder->CreateICmpUGT(lhs, rhs));
520 Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
522 return V(::builder->CreateICmpUGE(lhs, rhs));
525 Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
527 return V(::builder->CreateICmpULT(lhs, rhs));
530 Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
532 return V(::builder->CreateICmpULE(lhs, rhs));
535 Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
537 return V(::builder->CreateICmpSGT(lhs, rhs));
540 Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
542 return V(::builder->CreateICmpSGE(lhs, rhs));
545 Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
547 return V(::builder->CreateICmpSLT(lhs, rhs));
550 Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
552 return V(::builder->CreateICmpSLE(lhs, rhs));
555 Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
557 return V(::builder->CreateFCmpOEQ(lhs, rhs));
560 Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
562 return V(::builder->CreateFCmpOGT(lhs, rhs));
565 Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
567 return V(::builder->CreateFCmpOGE(lhs, rhs));
570 Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
572 return V(::builder->CreateFCmpOLT(lhs, rhs));
575 Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
577 return V(::builder->CreateFCmpOLE(lhs, rhs));
580 Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
582 return V(::builder->CreateFCmpONE(lhs, rhs));
585 Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
587 return V(::builder->CreateFCmpORD(lhs, rhs));
590 Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
592 return V(::builder->CreateFCmpUNO(lhs, rhs));
595 Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
597 return V(::builder->CreateFCmpUEQ(lhs, rhs));
600 Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
602 return V(::builder->CreateFCmpUGT(lhs, rhs));
605 Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
607 return V(::builder->CreateFCmpUGE(lhs, rhs));
610 Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
612 return V(::builder->CreateFCmpULT(lhs, rhs));
615 Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
617 return V(::builder->CreateFCmpULE(lhs, rhs));
620 Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
622 return V(::builder->CreateFCmpULE(lhs, rhs));
625 Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
627 assert(vector->getType()->getContainedType(0) == type);
628 return V(::builder->CreateExtractElement(vector, createConstantInt(index)));
631 Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
633 return V(::builder->CreateInsertElement(vector, element, createConstantInt(index)));
636 Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
638 int size = llvm::cast<llvm::VectorType>(V1->getType())->getNumElements();
639 const int maxSize = 16;
640 llvm::Constant *swizzle[maxSize];
641 assert(size <= maxSize);
643 for(int i = 0; i < size; i++)
645 swizzle[i] = llvm::ConstantInt::get(Type::getInt32Ty(*::context), select[i]);
648 llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size));
650 return V(::builder->CreateShuffleVector(V1, V2, shuffle));
653 Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
655 return V(::builder->CreateSelect(C, ifTrue, ifFalse));
658 SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
660 return reinterpret_cast<SwitchCases*>(::builder->CreateSwitch(control, defaultBranch, numCases));
663 void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
665 switchCases->addCase(llvm::ConstantInt::get(Type::getInt32Ty(*::context), label, true), branch);
668 void Nucleus::createUnreachable()
670 ::builder->CreateUnreachable();
673 static Value *createSwizzle4(Value *val, unsigned char select)
677 (select >> 0) & 0x03,
678 (select >> 2) & 0x03,
679 (select >> 4) & 0x03,
680 (select >> 6) & 0x03,
683 return Nucleus::createShuffleVector(val, val, swizzle);
686 static Value *createMask4(Value *lhs, Value *rhs, unsigned char select)
688 bool mask[4] = {false, false, false, false};
690 mask[(select >> 0) & 0x03] = true;
691 mask[(select >> 2) & 0x03] = true;
692 mask[(select >> 4) & 0x03] = true;
693 mask[(select >> 6) & 0x03] = true;
703 return Nucleus::createShuffleVector(lhs, rhs, swizzle);
706 Type *Nucleus::getPointerType(Type *ElementType)
708 return T(llvm::PointerType::get(ElementType, 0));
711 Value *Nucleus::createNullValue(Type *Ty)
713 return V(llvm::Constant::getNullValue(Ty));
716 Value *Nucleus::createConstantLong(int64_t i)
718 return V(llvm::ConstantInt::get(Type::getInt64Ty(*::context), i, true));
721 Value *Nucleus::createConstantInt(int i)
723 return V(llvm::ConstantInt::get(Type::getInt32Ty(*::context), i, true));
726 Value *Nucleus::createConstantInt(unsigned int i)
728 return V(llvm::ConstantInt::get(Type::getInt32Ty(*::context), i, false));
731 Value *Nucleus::createConstantBool(bool b)
733 return V(llvm::ConstantInt::get(Type::getInt1Ty(*::context), b));
736 Value *Nucleus::createConstantByte(signed char i)
738 return V(llvm::ConstantInt::get(Type::getInt8Ty(*::context), i, true));
741 Value *Nucleus::createConstantByte(unsigned char i)
743 return V(llvm::ConstantInt::get(Type::getInt8Ty(*::context), i, false));
746 Value *Nucleus::createConstantShort(short i)
748 return V(llvm::ConstantInt::get(Type::getInt16Ty(*::context), i, true));
751 Value *Nucleus::createConstantShort(unsigned short i)
753 return V(llvm::ConstantInt::get(Type::getInt16Ty(*::context), i, false));
756 Value *Nucleus::createConstantFloat(float x)
758 return V(llvm::ConstantFP::get(Float::getType(), x));
761 Value *Nucleus::createNullPointer(Type *Ty)
763 return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(Ty, 0)));
766 Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
768 assert(llvm::isa<VectorType>(type));
769 const int numConstants = llvm::cast<VectorType>(type)->getNumElements();
770 assert(numConstants <= 16);
771 llvm::Constant *constantVector[16];
773 for(int i = 0; i < numConstants; i++)
775 constantVector[i] = llvm::ConstantInt::get(type->getContainedType(0), constants[i]);
778 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numConstants)));
781 Value *Nucleus::createConstantVector(const double *constants, Type *type)
783 assert(llvm::isa<VectorType>(type));
784 const int numConstants = llvm::cast<VectorType>(type)->getNumElements();
785 assert(numConstants <= 8);
786 llvm::Constant *constantVector[8];
788 for(int i = 0; i < numConstants; i++)
790 constantVector[i] = llvm::ConstantFP::get(type->getContainedType(0), constants[i]);
793 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numConstants)));
796 Type *Void::getType()
798 return T(llvm::Type::getVoidTy(*::context));
801 class MMX : public LValue<MMX>
804 static Type *getType();
809 return T(llvm::Type::getX86_MMXTy(*::context));
812 Bool::Bool(Argument<Bool> argument)
814 storeValue(argument.value);
819 storeValue(Nucleus::createConstantBool(x));
822 Bool::Bool(RValue<Bool> rhs)
824 storeValue(rhs.value);
827 Bool::Bool(const Bool &rhs)
829 Value *value = rhs.loadValue();
833 Bool::Bool(const Reference<Bool> &rhs)
835 Value *value = rhs.loadValue();
839 RValue<Bool> Bool::operator=(RValue<Bool> rhs)
841 storeValue(rhs.value);
846 RValue<Bool> Bool::operator=(const Bool &rhs)
848 Value *value = rhs.loadValue();
851 return RValue<Bool>(value);
854 RValue<Bool> Bool::operator=(const Reference<Bool> &rhs)
856 Value *value = rhs.loadValue();
859 return RValue<Bool>(value);
862 RValue<Bool> operator!(RValue<Bool> val)
864 return RValue<Bool>(Nucleus::createNot(val.value));
867 RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs)
869 return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value));
872 RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs)
874 return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value));
877 Type *Bool::getType()
879 return T(llvm::Type::getInt1Ty(*::context));
882 Byte::Byte(Argument<Byte> argument)
884 storeValue(argument.value);
887 Byte::Byte(RValue<Int> cast)
889 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
894 Byte::Byte(RValue<UInt> cast)
896 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
901 Byte::Byte(RValue<UShort> cast)
903 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
910 storeValue(Nucleus::createConstantByte((unsigned char)x));
913 Byte::Byte(unsigned char x)
915 storeValue(Nucleus::createConstantByte(x));
918 Byte::Byte(RValue<Byte> rhs)
920 storeValue(rhs.value);
923 Byte::Byte(const Byte &rhs)
925 Value *value = rhs.loadValue();
929 Byte::Byte(const Reference<Byte> &rhs)
931 Value *value = rhs.loadValue();
935 RValue<Byte> Byte::operator=(RValue<Byte> rhs)
937 storeValue(rhs.value);
942 RValue<Byte> Byte::operator=(const Byte &rhs)
944 Value *value = rhs.loadValue();
947 return RValue<Byte>(value);
950 RValue<Byte> Byte::operator=(const Reference<Byte> &rhs)
952 Value *value = rhs.loadValue();
955 return RValue<Byte>(value);
958 RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs)
960 return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value));
963 RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs)
965 return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value));
968 RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs)
970 return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value));
973 RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs)
975 return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value));
978 RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs)
980 return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value));
983 RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs)
985 return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value));
988 RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs)
990 return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value));
993 RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs)
995 return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value));
998 RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs)
1000 return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value));
1003 RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs)
1005 return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value));
1008 RValue<Byte> operator+=(Byte &lhs, RValue<Byte> rhs)
1010 return lhs = lhs + rhs;
1013 RValue<Byte> operator-=(Byte &lhs, RValue<Byte> rhs)
1015 return lhs = lhs - rhs;
1018 RValue<Byte> operator*=(Byte &lhs, RValue<Byte> rhs)
1020 return lhs = lhs * rhs;
1023 RValue<Byte> operator/=(Byte &lhs, RValue<Byte> rhs)
1025 return lhs = lhs / rhs;
1028 RValue<Byte> operator%=(Byte &lhs, RValue<Byte> rhs)
1030 return lhs = lhs % rhs;
1033 RValue<Byte> operator&=(Byte &lhs, RValue<Byte> rhs)
1035 return lhs = lhs & rhs;
1038 RValue<Byte> operator|=(Byte &lhs, RValue<Byte> rhs)
1040 return lhs = lhs | rhs;
1043 RValue<Byte> operator^=(Byte &lhs, RValue<Byte> rhs)
1045 return lhs = lhs ^ rhs;
1048 RValue<Byte> operator<<=(Byte &lhs, RValue<Byte> rhs)
1050 return lhs = lhs << rhs;
1053 RValue<Byte> operator>>=(Byte &lhs, RValue<Byte> rhs)
1055 return lhs = lhs >> rhs;
1058 RValue<Byte> operator+(RValue<Byte> val)
1063 RValue<Byte> operator-(RValue<Byte> val)
1065 return RValue<Byte>(Nucleus::createNeg(val.value));
1068 RValue<Byte> operator~(RValue<Byte> val)
1070 return RValue<Byte>(Nucleus::createNot(val.value));
1073 RValue<Byte> operator++(Byte &val, int) // Post-increment
1075 RValue<Byte> res = val;
1077 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantByte((unsigned char)1)));
1078 val.storeValue(inc);
1083 const Byte &operator++(Byte &val) // Pre-increment
1085 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantByte((unsigned char)1)));
1086 val.storeValue(inc);
1091 RValue<Byte> operator--(Byte &val, int) // Post-decrement
1093 RValue<Byte> res = val;
1095 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantByte((unsigned char)1)));
1096 val.storeValue(inc);
1101 const Byte &operator--(Byte &val) // Pre-decrement
1103 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantByte((unsigned char)1)));
1104 val.storeValue(inc);
1109 RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs)
1111 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1114 RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs)
1116 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1119 RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs)
1121 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1124 RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs)
1126 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1129 RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs)
1131 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1134 RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs)
1136 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1139 Type *Byte::getType()
1141 return T(llvm::Type::getInt8Ty(*::context));
1144 SByte::SByte(Argument<SByte> argument)
1146 storeValue(argument.value);
1149 SByte::SByte(RValue<Int> cast)
1151 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1153 storeValue(integer);
1156 SByte::SByte(RValue<Short> cast)
1158 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1160 storeValue(integer);
1163 SByte::SByte(signed char x)
1165 storeValue(Nucleus::createConstantByte(x));
1168 SByte::SByte(RValue<SByte> rhs)
1170 storeValue(rhs.value);
1173 SByte::SByte(const SByte &rhs)
1175 Value *value = rhs.loadValue();
1179 SByte::SByte(const Reference<SByte> &rhs)
1181 Value *value = rhs.loadValue();
1185 RValue<SByte> SByte::operator=(RValue<SByte> rhs)
1187 storeValue(rhs.value);
1192 RValue<SByte> SByte::operator=(const SByte &rhs)
1194 Value *value = rhs.loadValue();
1197 return RValue<SByte>(value);
1200 RValue<SByte> SByte::operator=(const Reference<SByte> &rhs)
1202 Value *value = rhs.loadValue();
1205 return RValue<SByte>(value);
1208 RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs)
1210 return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value));
1213 RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs)
1215 return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value));
1218 RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs)
1220 return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value));
1223 RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs)
1225 return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value));
1228 RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs)
1230 return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value));
1233 RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs)
1235 return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value));
1238 RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs)
1240 return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value));
1243 RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs)
1245 return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value));
1248 RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs)
1250 return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value));
1253 RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs)
1255 return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value));
1258 RValue<SByte> operator+=(SByte &lhs, RValue<SByte> rhs)
1260 return lhs = lhs + rhs;
1263 RValue<SByte> operator-=(SByte &lhs, RValue<SByte> rhs)
1265 return lhs = lhs - rhs;
1268 RValue<SByte> operator*=(SByte &lhs, RValue<SByte> rhs)
1270 return lhs = lhs * rhs;
1273 RValue<SByte> operator/=(SByte &lhs, RValue<SByte> rhs)
1275 return lhs = lhs / rhs;
1278 RValue<SByte> operator%=(SByte &lhs, RValue<SByte> rhs)
1280 return lhs = lhs % rhs;
1283 RValue<SByte> operator&=(SByte &lhs, RValue<SByte> rhs)
1285 return lhs = lhs & rhs;
1288 RValue<SByte> operator|=(SByte &lhs, RValue<SByte> rhs)
1290 return lhs = lhs | rhs;
1293 RValue<SByte> operator^=(SByte &lhs, RValue<SByte> rhs)
1295 return lhs = lhs ^ rhs;
1298 RValue<SByte> operator<<=(SByte &lhs, RValue<SByte> rhs)
1300 return lhs = lhs << rhs;
1303 RValue<SByte> operator>>=(SByte &lhs, RValue<SByte> rhs)
1305 return lhs = lhs >> rhs;
1308 RValue<SByte> operator+(RValue<SByte> val)
1313 RValue<SByte> operator-(RValue<SByte> val)
1315 return RValue<SByte>(Nucleus::createNeg(val.value));
1318 RValue<SByte> operator~(RValue<SByte> val)
1320 return RValue<SByte>(Nucleus::createNot(val.value));
1323 RValue<SByte> operator++(SByte &val, int) // Post-increment
1325 RValue<SByte> res = val;
1327 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantByte((signed char)1)));
1328 val.storeValue(inc);
1333 const SByte &operator++(SByte &val) // Pre-increment
1335 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantByte((signed char)1)));
1336 val.storeValue(inc);
1341 RValue<SByte> operator--(SByte &val, int) // Post-decrement
1343 RValue<SByte> res = val;
1345 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantByte((signed char)1)));
1346 val.storeValue(inc);
1351 const SByte &operator--(SByte &val) // Pre-decrement
1353 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantByte((signed char)1)));
1354 val.storeValue(inc);
1359 RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs)
1361 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
1364 RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs)
1366 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
1369 RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs)
1371 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
1374 RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs)
1376 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
1379 RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs)
1381 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1384 RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs)
1386 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1389 Type *SByte::getType()
1391 return T(llvm::Type::getInt8Ty(*::context));
1394 Short::Short(Argument<Short> argument)
1396 storeValue(argument.value);
1399 Short::Short(RValue<Int> cast)
1401 Value *integer = Nucleus::createTrunc(cast.value, Short::getType());
1403 storeValue(integer);
1406 Short::Short(short x)
1408 storeValue(Nucleus::createConstantShort(x));
1411 Short::Short(RValue<Short> rhs)
1413 storeValue(rhs.value);
1416 Short::Short(const Short &rhs)
1418 Value *value = rhs.loadValue();
1422 Short::Short(const Reference<Short> &rhs)
1424 Value *value = rhs.loadValue();
1428 RValue<Short> Short::operator=(RValue<Short> rhs)
1430 storeValue(rhs.value);
1435 RValue<Short> Short::operator=(const Short &rhs)
1437 Value *value = rhs.loadValue();
1440 return RValue<Short>(value);
1443 RValue<Short> Short::operator=(const Reference<Short> &rhs)
1445 Value *value = rhs.loadValue();
1448 return RValue<Short>(value);
1451 RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs)
1453 return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value));
1456 RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs)
1458 return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value));
1461 RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs)
1463 return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value));
1466 RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs)
1468 return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value));
1471 RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs)
1473 return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value));
1476 RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs)
1478 return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value));
1481 RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs)
1483 return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value));
1486 RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs)
1488 return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value));
1491 RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs)
1493 return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value));
1496 RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs)
1498 return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value));
1501 RValue<Short> operator+=(Short &lhs, RValue<Short> rhs)
1503 return lhs = lhs + rhs;
1506 RValue<Short> operator-=(Short &lhs, RValue<Short> rhs)
1508 return lhs = lhs - rhs;
1511 RValue<Short> operator*=(Short &lhs, RValue<Short> rhs)
1513 return lhs = lhs * rhs;
1516 RValue<Short> operator/=(Short &lhs, RValue<Short> rhs)
1518 return lhs = lhs / rhs;
1521 RValue<Short> operator%=(Short &lhs, RValue<Short> rhs)
1523 return lhs = lhs % rhs;
1526 RValue<Short> operator&=(Short &lhs, RValue<Short> rhs)
1528 return lhs = lhs & rhs;
1531 RValue<Short> operator|=(Short &lhs, RValue<Short> rhs)
1533 return lhs = lhs | rhs;
1536 RValue<Short> operator^=(Short &lhs, RValue<Short> rhs)
1538 return lhs = lhs ^ rhs;
1541 RValue<Short> operator<<=(Short &lhs, RValue<Short> rhs)
1543 return lhs = lhs << rhs;
1546 RValue<Short> operator>>=(Short &lhs, RValue<Short> rhs)
1548 return lhs = lhs >> rhs;
1551 RValue<Short> operator+(RValue<Short> val)
1556 RValue<Short> operator-(RValue<Short> val)
1558 return RValue<Short>(Nucleus::createNeg(val.value));
1561 RValue<Short> operator~(RValue<Short> val)
1563 return RValue<Short>(Nucleus::createNot(val.value));
1566 RValue<Short> operator++(Short &val, int) // Post-increment
1568 RValue<Short> res = val;
1570 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantShort((short)1)));
1571 val.storeValue(inc);
1576 const Short &operator++(Short &val) // Pre-increment
1578 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantShort((short)1)));
1579 val.storeValue(inc);
1584 RValue<Short> operator--(Short &val, int) // Post-decrement
1586 RValue<Short> res = val;
1588 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantShort((short)1)));
1589 val.storeValue(inc);
1594 const Short &operator--(Short &val) // Pre-decrement
1596 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantShort((short)1)));
1597 val.storeValue(inc);
1602 RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs)
1604 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
1607 RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs)
1609 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
1612 RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs)
1614 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
1617 RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs)
1619 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
1622 RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs)
1624 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1627 RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs)
1629 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1632 Type *Short::getType()
1634 return T(llvm::Type::getInt16Ty(*::context));
1637 UShort::UShort(Argument<UShort> argument)
1639 storeValue(argument.value);
1642 UShort::UShort(RValue<UInt> cast)
1644 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
1646 storeValue(integer);
1649 UShort::UShort(RValue<Int> cast)
1651 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
1653 storeValue(integer);
1656 UShort::UShort(unsigned short x)
1658 storeValue(Nucleus::createConstantShort(x));
1661 UShort::UShort(RValue<UShort> rhs)
1663 storeValue(rhs.value);
1666 UShort::UShort(const UShort &rhs)
1668 Value *value = rhs.loadValue();
1672 UShort::UShort(const Reference<UShort> &rhs)
1674 Value *value = rhs.loadValue();
1678 RValue<UShort> UShort::operator=(RValue<UShort> rhs)
1680 storeValue(rhs.value);
1685 RValue<UShort> UShort::operator=(const UShort &rhs)
1687 Value *value = rhs.loadValue();
1690 return RValue<UShort>(value);
1693 RValue<UShort> UShort::operator=(const Reference<UShort> &rhs)
1695 Value *value = rhs.loadValue();
1698 return RValue<UShort>(value);
1701 RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs)
1703 return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value));
1706 RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs)
1708 return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value));
1711 RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs)
1713 return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value));
1716 RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs)
1718 return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value));
1721 RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs)
1723 return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value));
1726 RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs)
1728 return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value));
1731 RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs)
1733 return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value));
1736 RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs)
1738 return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value));
1741 RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs)
1743 return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value));
1746 RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs)
1748 return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value));
1751 RValue<UShort> operator+=(UShort &lhs, RValue<UShort> rhs)
1753 return lhs = lhs + rhs;
1756 RValue<UShort> operator-=(UShort &lhs, RValue<UShort> rhs)
1758 return lhs = lhs - rhs;
1761 RValue<UShort> operator*=(UShort &lhs, RValue<UShort> rhs)
1763 return lhs = lhs * rhs;
1766 RValue<UShort> operator/=(UShort &lhs, RValue<UShort> rhs)
1768 return lhs = lhs / rhs;
1771 RValue<UShort> operator%=(UShort &lhs, RValue<UShort> rhs)
1773 return lhs = lhs % rhs;
1776 RValue<UShort> operator&=(UShort &lhs, RValue<UShort> rhs)
1778 return lhs = lhs & rhs;
1781 RValue<UShort> operator|=(UShort &lhs, RValue<UShort> rhs)
1783 return lhs = lhs | rhs;
1786 RValue<UShort> operator^=(UShort &lhs, RValue<UShort> rhs)
1788 return lhs = lhs ^ rhs;
1791 RValue<UShort> operator<<=(UShort &lhs, RValue<UShort> rhs)
1793 return lhs = lhs << rhs;
1796 RValue<UShort> operator>>=(UShort &lhs, RValue<UShort> rhs)
1798 return lhs = lhs >> rhs;
1801 RValue<UShort> operator+(RValue<UShort> val)
1806 RValue<UShort> operator-(RValue<UShort> val)
1808 return RValue<UShort>(Nucleus::createNeg(val.value));
1811 RValue<UShort> operator~(RValue<UShort> val)
1813 return RValue<UShort>(Nucleus::createNot(val.value));
1816 RValue<UShort> operator++(UShort &val, int) // Post-increment
1818 RValue<UShort> res = val;
1820 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantShort((unsigned short)1)));
1821 val.storeValue(inc);
1826 const UShort &operator++(UShort &val) // Pre-increment
1828 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantShort((unsigned short)1)));
1829 val.storeValue(inc);
1834 RValue<UShort> operator--(UShort &val, int) // Post-decrement
1836 RValue<UShort> res = val;
1838 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantShort((unsigned short)1)));
1839 val.storeValue(inc);
1844 const UShort &operator--(UShort &val) // Pre-decrement
1846 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantShort((unsigned short)1)));
1847 val.storeValue(inc);
1852 RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs)
1854 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1857 RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs)
1859 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1862 RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs)
1864 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1867 RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs)
1869 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1872 RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs)
1874 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1877 RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs)
1879 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1882 Type *UShort::getType()
1884 return T(llvm::Type::getInt16Ty(*::context));
1887 Byte4::Byte4(RValue<Byte8> cast)
1889 storeValue(Nucleus::createTrunc(Nucleus::createBitCast(cast.value, Long::getType()), Int::getType()));
1892 Byte4::Byte4(const Reference<Byte4> &rhs)
1894 Value *value = rhs.loadValue();
1898 Type *Byte4::getType()
1901 return T(VectorType::get(Byte::getType(), 4));
1903 return UInt::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block
1907 Type *SByte4::getType()
1910 return T(VectorType::get(SByte::getType(), 4));
1912 return Int::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block
1916 Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
1918 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
1919 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(Byte::getType(), 8))));
1921 storeValue(Nucleus::createBitCast(vector, getType()));
1924 Byte8::Byte8(RValue<Byte8> rhs)
1926 storeValue(rhs.value);
1929 Byte8::Byte8(const Byte8 &rhs)
1931 Value *value = rhs.loadValue();
1935 Byte8::Byte8(const Reference<Byte8> &rhs)
1937 Value *value = rhs.loadValue();
1941 RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs)
1943 storeValue(rhs.value);
1948 RValue<Byte8> Byte8::operator=(const Byte8 &rhs)
1950 Value *value = rhs.loadValue();
1953 return RValue<Byte8>(value);
1956 RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs)
1958 Value *value = rhs.loadValue();
1961 return RValue<Byte8>(value);
1964 RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs)
1966 if(CPUID::supportsMMX2())
1968 return x86::paddb(lhs, rhs);
1972 return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
1976 RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs)
1978 if(CPUID::supportsMMX2())
1980 return x86::psubb(lhs, rhs);
1984 return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
1988 // RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs)
1990 // return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value));
1993 // RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs)
1995 // return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value));
1998 // RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs)
2000 // return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value));
2003 RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs)
2005 if(CPUID::supportsMMX2())
2007 return As<Byte8>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
2011 return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
2015 RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs)
2017 if(CPUID::supportsMMX2())
2019 return As<Byte8>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
2023 return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
2027 RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs)
2029 if(CPUID::supportsMMX2())
2031 return As<Byte8>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
2035 return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
2039 // RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs)
2041 // return RValue<Byte8>(Nucleus::createShl(lhs.value, rhs.value));
2044 // RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs)
2046 // return RValue<Byte8>(Nucleus::createLShr(lhs.value, rhs.value));
2049 RValue<Byte8> operator+=(Byte8 &lhs, RValue<Byte8> rhs)
2051 return lhs = lhs + rhs;
2054 RValue<Byte8> operator-=(Byte8 &lhs, RValue<Byte8> rhs)
2056 return lhs = lhs - rhs;
2059 // RValue<Byte8> operator*=(Byte8 &lhs, RValue<Byte8> rhs)
2061 // return lhs = lhs * rhs;
2064 // RValue<Byte8> operator/=(Byte8 &lhs, RValue<Byte8> rhs)
2066 // return lhs = lhs / rhs;
2069 // RValue<Byte8> operator%=(Byte8 &lhs, RValue<Byte8> rhs)
2071 // return lhs = lhs % rhs;
2074 RValue<Byte8> operator&=(Byte8 &lhs, RValue<Byte8> rhs)
2076 return lhs = lhs & rhs;
2079 RValue<Byte8> operator|=(Byte8 &lhs, RValue<Byte8> rhs)
2081 return lhs = lhs | rhs;
2084 RValue<Byte8> operator^=(Byte8 &lhs, RValue<Byte8> rhs)
2086 return lhs = lhs ^ rhs;
2089 // RValue<Byte8> operator<<=(Byte8 &lhs, RValue<Byte8> rhs)
2091 // return lhs = lhs << rhs;
2094 // RValue<Byte8> operator>>=(Byte8 &lhs, RValue<Byte8> rhs)
2096 // return lhs = lhs >> rhs;
2099 // RValue<Byte8> operator+(RValue<Byte8> val)
2104 // RValue<Byte8> operator-(RValue<Byte8> val)
2106 // return RValue<Byte8>(Nucleus::createNeg(val.value));
2109 RValue<Byte8> operator~(RValue<Byte8> val)
2111 if(CPUID::supportsMMX2())
2113 return val ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
2117 return RValue<Byte8>(Nucleus::createNot(val.value));
2121 RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2123 return x86::paddusb(x, y);
2126 RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2128 return x86::psubusb(x, y);
2131 RValue<Short4> Unpack(RValue<Byte4> x)
2133 Value *int2 = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), x.value, 0);
2134 Value *byte8 = Nucleus::createBitCast(int2, Byte8::getType());
2136 return UnpackLow(RValue<Byte8>(byte8), RValue<Byte8>(byte8));
2139 RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
2141 if(CPUID::supportsMMX2())
2143 return x86::punpcklbw(x, y);
2147 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
2148 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
2150 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2154 RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
2156 if(CPUID::supportsMMX2())
2158 return x86::punpckhbw(x, y);
2162 int shuffle[8] = {4, 12, 5, 13, 6, 14, 7, 15};
2163 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
2165 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2169 RValue<Int> SignMask(RValue<Byte8> x)
2171 return x86::pmovmskb(x);
2174 // RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2176 // return x86::pcmpgtb(x, y); // FIXME: Signedness
2179 RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2181 return x86::pcmpeqb(x, y);
2184 Type *Byte8::getType()
2186 if(CPUID::supportsMMX2())
2188 return MMX::getType();
2192 return T(VectorType::get(Byte::getType(), 8));
2196 SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2198 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
2199 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(SByte::getType(), 8))));
2201 storeValue(Nucleus::createBitCast(vector, getType()));
2204 SByte8::SByte8(RValue<SByte8> rhs)
2206 storeValue(rhs.value);
2209 SByte8::SByte8(const SByte8 &rhs)
2211 Value *value = rhs.loadValue();
2215 SByte8::SByte8(const Reference<SByte8> &rhs)
2217 Value *value = rhs.loadValue();
2221 RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs)
2223 storeValue(rhs.value);
2228 RValue<SByte8> SByte8::operator=(const SByte8 &rhs)
2230 Value *value = rhs.loadValue();
2233 return RValue<SByte8>(value);
2236 RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs)
2238 Value *value = rhs.loadValue();
2241 return RValue<SByte8>(value);
2244 RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs)
2246 if(CPUID::supportsMMX2())
2248 return As<SByte8>(x86::paddb(As<Byte8>(lhs), As<Byte8>(rhs)));
2252 return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
2256 RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs)
2258 if(CPUID::supportsMMX2())
2260 return As<SByte8>(x86::psubb(As<Byte8>(lhs), As<Byte8>(rhs)));
2264 return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
2268 // RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs)
2270 // return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value));
2273 // RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs)
2275 // return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value));
2278 // RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs)
2280 // return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value));
2283 RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs)
2285 return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value));
2288 RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs)
2290 return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value));
2293 RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs)
2295 return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value));
2298 // RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2300 // return RValue<SByte8>(Nucleus::createShl(lhs.value, rhs.value));
2303 // RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2305 // return RValue<SByte8>(Nucleus::createAShr(lhs.value, rhs.value));
2308 RValue<SByte8> operator+=(SByte8 &lhs, RValue<SByte8> rhs)
2310 return lhs = lhs + rhs;
2313 RValue<SByte8> operator-=(SByte8 &lhs, RValue<SByte8> rhs)
2315 return lhs = lhs - rhs;
2318 // RValue<SByte8> operator*=(SByte8 &lhs, RValue<SByte8> rhs)
2320 // return lhs = lhs * rhs;
2323 // RValue<SByte8> operator/=(SByte8 &lhs, RValue<SByte8> rhs)
2325 // return lhs = lhs / rhs;
2328 // RValue<SByte8> operator%=(SByte8 &lhs, RValue<SByte8> rhs)
2330 // return lhs = lhs % rhs;
2333 RValue<SByte8> operator&=(SByte8 &lhs, RValue<SByte8> rhs)
2335 return lhs = lhs & rhs;
2338 RValue<SByte8> operator|=(SByte8 &lhs, RValue<SByte8> rhs)
2340 return lhs = lhs | rhs;
2343 RValue<SByte8> operator^=(SByte8 &lhs, RValue<SByte8> rhs)
2345 return lhs = lhs ^ rhs;
2348 // RValue<SByte8> operator<<=(SByte8 &lhs, RValue<SByte8> rhs)
2350 // return lhs = lhs << rhs;
2353 // RValue<SByte8> operator>>=(SByte8 &lhs, RValue<SByte8> rhs)
2355 // return lhs = lhs >> rhs;
2358 // RValue<SByte8> operator+(RValue<SByte8> val)
2363 // RValue<SByte8> operator-(RValue<SByte8> val)
2365 // return RValue<SByte8>(Nucleus::createNeg(val.value));
2368 RValue<SByte8> operator~(RValue<SByte8> val)
2370 if(CPUID::supportsMMX2())
2372 return val ^ SByte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
2376 return RValue<SByte8>(Nucleus::createNot(val.value));
2380 RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2382 return x86::paddsb(x, y);
2385 RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2387 return x86::psubsb(x, y);
2390 RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
2392 if(CPUID::supportsMMX2())
2394 return As<Short4>(x86::punpcklbw(As<Byte8>(x), As<Byte8>(y)));
2398 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
2399 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
2401 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2405 RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
2407 if(CPUID::supportsMMX2())
2409 return As<Short4>(x86::punpckhbw(As<Byte8>(x), As<Byte8>(y)));
2413 int shuffle[8] = {4, 12, 5, 13, 6, 14, 7, 15};
2414 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
2416 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2420 RValue<Int> SignMask(RValue<SByte8> x)
2422 return x86::pmovmskb(As<Byte8>(x));
2425 RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2427 return x86::pcmpgtb(x, y);
2430 RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2432 return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
2435 Type *SByte8::getType()
2437 if(CPUID::supportsMMX2())
2439 return MMX::getType();
2443 return T(VectorType::get(SByte::getType(), 8));
2447 Byte16::Byte16(RValue<Byte16> rhs)
2449 storeValue(rhs.value);
2452 Byte16::Byte16(const Byte16 &rhs)
2454 Value *value = rhs.loadValue();
2458 Byte16::Byte16(const Reference<Byte16> &rhs)
2460 Value *value = rhs.loadValue();
2464 RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs)
2466 storeValue(rhs.value);
2471 RValue<Byte16> Byte16::operator=(const Byte16 &rhs)
2473 Value *value = rhs.loadValue();
2476 return RValue<Byte16>(value);
2479 RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs)
2481 Value *value = rhs.loadValue();
2484 return RValue<Byte16>(value);
2487 Type *Byte16::getType()
2489 return T(VectorType::get(Byte::getType(), 16));
2492 Type *SByte16::getType()
2494 return T( VectorType::get(SByte::getType(), 16));
2497 Short2::Short2(RValue<Short4> cast)
2499 storeValue(Nucleus::createTrunc(Nucleus::createBitCast(cast.value, Long::getType()), UInt::getType()));
2502 Type *Short2::getType()
2505 return T(VectorType::get(Short::getType(), 2));
2507 return UInt::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block
2511 UShort2::UShort2(RValue<UShort4> cast)
2513 storeValue(Nucleus::createTrunc(Nucleus::createBitCast(cast.value, Long::getType()), UInt::getType()));
2516 Type *UShort2::getType()
2519 return T(VectorType::get(UShort::getType(), 2));
2521 return UInt::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block
2525 Short4::Short4(RValue<Int> cast)
2527 Value *extend = Nucleus::createZExt(cast.value, Long::getType());
2528 Value *swizzle = Swizzle(RValue<Short4>(extend), 0x00).value;
2530 storeValue(swizzle);
2533 Short4::Short4(RValue<Int4> cast)
2535 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2537 #if 0 // FIXME: Check codegen (pshuflw phshufhw pshufd)
2539 pack[0] = Nucleus::createConstantInt(0);
2540 pack[1] = Nucleus::createConstantInt(2);
2541 pack[2] = Nucleus::createConstantInt(4);
2542 pack[3] = Nucleus::createConstantInt(6);
2544 Value *short4 = Nucleus::createShuffleVector(short8, short8, Nucleus::createConstantVector(pack, 4));
2548 // FIXME: Use Swizzle<Short8>
2549 if(!CPUID::supportsSSSE3())
2551 int pshuflw[8] = {0, 2, 0, 2, 4, 5, 6, 7};
2552 int pshufhw[8] = {0, 1, 2, 3, 4, 6, 4, 6};
2554 Value *shuffle1 = Nucleus::createShuffleVector(short8, short8, pshuflw);
2555 Value *shuffle2 = Nucleus::createShuffleVector(shuffle1, shuffle1, pshufhw);
2556 Value *int4 = Nucleus::createBitCast(shuffle2, Int4::getType());
2557 packed = createSwizzle4(int4, 0x88);
2561 int pshufb[16] = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
2562 Value *byte16 = Nucleus::createBitCast(cast.value, Byte16::getType());
2563 packed = Nucleus::createShuffleVector(byte16, byte16, pshufb);
2566 #if 0 // FIXME: No optimal instruction selection
2567 Value *qword2 = Nucleus::createBitCast(packed, T(VectorType::get(Long::getType(), 2)));
2568 Value *element = Nucleus::createExtractElement(qword2, 0);
2569 Value *short4 = Nucleus::createBitCast(element, Short4::getType());
2570 #else // FIXME: Requires SSE
2571 Value *int2 = RValue<Int2>(Int2(RValue<Int4>(packed))).value;
2572 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
2579 // Short4::Short4(RValue<Float> cast)
2583 Short4::Short4(RValue<Float4> cast)
2585 Int4 v4i32 = Int4(cast);
2586 v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
2588 storeValue(As<Short4>(Int2(v4i32)).value);
2591 Short4::Short4(short xyzw)
2593 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
2594 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(Short::getType(), 4))));
2596 storeValue(Nucleus::createBitCast(vector, getType()));
2599 Short4::Short4(short x, short y, short z, short w)
2601 int64_t constantVector[4] = {x, y, z, w};
2602 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(Short::getType(), 4))));
2604 storeValue(Nucleus::createBitCast(vector, getType()));
2607 Short4::Short4(RValue<Short4> rhs)
2609 storeValue(rhs.value);
2612 Short4::Short4(const Short4 &rhs)
2614 Value *value = rhs.loadValue();
2618 Short4::Short4(const Reference<Short4> &rhs)
2620 Value *value = rhs.loadValue();
2624 Short4::Short4(RValue<UShort4> rhs)
2626 storeValue(rhs.value);
2629 Short4::Short4(const UShort4 &rhs)
2631 storeValue(rhs.loadValue());
2634 Short4::Short4(const Reference<UShort4> &rhs)
2636 storeValue(rhs.loadValue());
2639 RValue<Short4> Short4::operator=(RValue<Short4> rhs)
2641 storeValue(rhs.value);
2646 RValue<Short4> Short4::operator=(const Short4 &rhs)
2648 Value *value = rhs.loadValue();
2651 return RValue<Short4>(value);
2654 RValue<Short4> Short4::operator=(const Reference<Short4> &rhs)
2656 Value *value = rhs.loadValue();
2659 return RValue<Short4>(value);
2662 RValue<Short4> Short4::operator=(RValue<UShort4> rhs)
2664 storeValue(rhs.value);
2666 return RValue<Short4>(rhs);
2669 RValue<Short4> Short4::operator=(const UShort4 &rhs)
2671 Value *value = rhs.loadValue();
2674 return RValue<Short4>(value);
2677 RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs)
2679 Value *value = rhs.loadValue();
2682 return RValue<Short4>(value);
2685 RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs)
2687 if(CPUID::supportsMMX2())
2689 return x86::paddw(lhs, rhs);
2693 return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
2697 RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs)
2699 if(CPUID::supportsMMX2())
2701 return x86::psubw(lhs, rhs);
2705 return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
2709 RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs)
2711 if(CPUID::supportsMMX2())
2713 return x86::pmullw(lhs, rhs);
2717 return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
2721 // RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs)
2723 // return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value));
2726 // RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs)
2728 // return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value));
2731 RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs)
2733 if(CPUID::supportsMMX2())
2735 return x86::pand(lhs, rhs);
2739 return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
2743 RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs)
2745 if(CPUID::supportsMMX2())
2747 return x86::por(lhs, rhs);
2751 return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
2755 RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs)
2757 if(CPUID::supportsMMX2())
2759 return x86::pxor(lhs, rhs);
2763 return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
2767 RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2769 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2771 return x86::psllw(lhs, rhs);
2774 RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2776 // return RValue<Short4>(Nucleus::createAShr(lhs.value, rhs.value));
2778 return x86::psraw(lhs, rhs);
2781 RValue<Short4> operator+=(Short4 &lhs, RValue<Short4> rhs)
2783 return lhs = lhs + rhs;
2786 RValue<Short4> operator-=(Short4 &lhs, RValue<Short4> rhs)
2788 return lhs = lhs - rhs;
2791 RValue<Short4> operator*=(Short4 &lhs, RValue<Short4> rhs)
2793 return lhs = lhs * rhs;
2796 // RValue<Short4> operator/=(Short4 &lhs, RValue<Short4> rhs)
2798 // return lhs = lhs / rhs;
2801 // RValue<Short4> operator%=(Short4 &lhs, RValue<Short4> rhs)
2803 // return lhs = lhs % rhs;
2806 RValue<Short4> operator&=(Short4 &lhs, RValue<Short4> rhs)
2808 return lhs = lhs & rhs;
2811 RValue<Short4> operator|=(Short4 &lhs, RValue<Short4> rhs)
2813 return lhs = lhs | rhs;
2816 RValue<Short4> operator^=(Short4 &lhs, RValue<Short4> rhs)
2818 return lhs = lhs ^ rhs;
2821 RValue<Short4> operator<<=(Short4 &lhs, unsigned char rhs)
2823 return lhs = lhs << rhs;
2826 RValue<Short4> operator>>=(Short4 &lhs, unsigned char rhs)
2828 return lhs = lhs >> rhs;
2831 // RValue<Short4> operator+(RValue<Short4> val)
2836 RValue<Short4> operator-(RValue<Short4> val)
2838 if(CPUID::supportsMMX2())
2840 return Short4(0, 0, 0, 0) - val;
2844 return RValue<Short4>(Nucleus::createNeg(val.value));
2848 RValue<Short4> operator~(RValue<Short4> val)
2850 if(CPUID::supportsMMX2())
2852 return val ^ Short4(0xFFFFu, 0xFFFFu, 0xFFFFu, 0xFFFFu);
2856 return RValue<Short4>(Nucleus::createNot(val.value));
2860 RValue<Short4> RoundShort4(RValue<Float4> cast)
2862 RValue<Int4> v4i32 = x86::cvtps2dq(cast);
2863 RValue<Short8> v8i16 = x86::packssdw(v4i32, v4i32);
2865 return As<Short4>(Int2(As<Int4>(v8i16)));
2868 RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2870 return x86::pmaxsw(x, y);
2873 RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2875 return x86::pminsw(x, y);
2878 RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2880 return x86::paddsw(x, y);
2883 RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2885 return x86::psubsw(x, y);
2888 RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2890 return x86::pmulhw(x, y);
2893 RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2895 return x86::pmaddwd(x, y);
2898 RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y)
2900 return x86::packsswb(x, y);
2903 RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
2905 if(CPUID::supportsMMX2())
2907 return x86::punpcklwd(x, y);
2911 int shuffle[4] = {0, 4, 1, 5};
2912 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
2914 return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType()));
2918 RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
2920 if(CPUID::supportsMMX2())
2922 return x86::punpckhwd(x, y);
2926 int shuffle[4] = {2, 6, 3, 7};
2927 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
2929 return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType()));
2933 RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select)
2935 if(CPUID::supportsMMX2())
2937 return x86::pshufw(x, select);
2941 return RValue<Short4>(createSwizzle4(x.value, select));
2945 RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i)
2947 if(CPUID::supportsMMX2())
2949 return x86::pinsrw(val, Int(element), i);
2953 return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
2957 RValue<Short> Extract(RValue<Short4> val, int i)
2959 if(CPUID::supportsMMX2())
2961 return Short(x86::pextrw(val, i));
2965 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
2969 RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
2971 return x86::pcmpgtw(x, y);
2974 RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
2976 return x86::pcmpeqw(x, y);
2979 Type *Short4::getType()
2981 if(CPUID::supportsMMX2())
2983 return MMX::getType();
2987 return T(VectorType::get(Short::getType(), 4));
2991 UShort4::UShort4(RValue<Int4> cast)
2993 *this = Short4(cast);
2996 UShort4::UShort4(RValue<Float4> cast, bool saturate)
3002 if(CPUID::supportsSSE4_1())
3004 sat = Min(cast, Float4(0xFFFF)); // packusdw takes care of 0x0000 saturation
3008 sat = Max(Min(cast, Float4(0xFFFF)), Float4(0x0000));
3018 if(!saturate || !CPUID::supportsSSE4_1())
3020 *this = Short4(int4);
3024 *this = As<Short4>(Int2(As<Int4>(x86::packusdw(int4, int4))));
3028 UShort4::UShort4(unsigned short xyzw)
3030 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
3031 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(UShort::getType(), 4))));
3033 storeValue(Nucleus::createBitCast(vector, getType()));
3036 UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
3038 int64_t constantVector[4] = {x, y, z, w};
3039 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(UShort::getType(), 4))));
3041 storeValue(Nucleus::createBitCast(vector, getType()));
3044 UShort4::UShort4(RValue<UShort4> rhs)
3046 storeValue(rhs.value);
3049 UShort4::UShort4(const UShort4 &rhs)
3051 Value *value = rhs.loadValue();
3055 UShort4::UShort4(const Reference<UShort4> &rhs)
3057 Value *value = rhs.loadValue();
3061 UShort4::UShort4(RValue<Short4> rhs)
3063 storeValue(rhs.value);
3066 UShort4::UShort4(const Short4 &rhs)
3068 Value *value = rhs.loadValue();
3072 UShort4::UShort4(const Reference<Short4> &rhs)
3074 Value *value = rhs.loadValue();
3078 RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs)
3080 storeValue(rhs.value);
3085 RValue<UShort4> UShort4::operator=(const UShort4 &rhs)
3087 Value *value = rhs.loadValue();
3090 return RValue<UShort4>(value);
3093 RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs)
3095 Value *value = rhs.loadValue();
3098 return RValue<UShort4>(value);
3101 RValue<UShort4> UShort4::operator=(RValue<Short4> rhs)
3103 storeValue(rhs.value);
3105 return RValue<UShort4>(rhs);
3108 RValue<UShort4> UShort4::operator=(const Short4 &rhs)
3110 Value *value = rhs.loadValue();
3113 return RValue<UShort4>(value);
3116 RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs)
3118 Value *value = rhs.loadValue();
3121 return RValue<UShort4>(value);
3124 RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs)
3126 if(CPUID::supportsMMX2())
3128 return As<UShort4>(x86::paddw(As<Short4>(lhs), As<Short4>(rhs)));
3132 return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
3136 RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs)
3138 if(CPUID::supportsMMX2())
3140 return As<UShort4>(x86::psubw(As<Short4>(lhs), As<Short4>(rhs)));
3144 return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
3148 RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs)
3150 if(CPUID::supportsMMX2())
3152 return As<UShort4>(x86::pmullw(As<Short4>(lhs), As<Short4>(rhs)));
3156 return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
3160 RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs)
3162 if(CPUID::supportsMMX2())
3164 return As<UShort4>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
3168 return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value));
3172 RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs)
3174 if(CPUID::supportsMMX2())
3176 return As<UShort4>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
3180 return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value));
3184 RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs)
3186 if(CPUID::supportsMMX2())
3188 return As<UShort4>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
3192 return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
3196 RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
3198 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
3200 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
3203 RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
3205 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
3207 return x86::psrlw(lhs, rhs);
3210 RValue<UShort4> operator<<=(UShort4 &lhs, unsigned char rhs)
3212 return lhs = lhs << rhs;
3215 RValue<UShort4> operator>>=(UShort4 &lhs, unsigned char rhs)
3217 return lhs = lhs >> rhs;
3220 RValue<UShort4> operator~(RValue<UShort4> val)
3222 if(CPUID::supportsMMX2())
3224 return As<UShort4>(As<Short4>(val) ^ Short4(0xFFFFu, 0xFFFFu, 0xFFFFu, 0xFFFFu));
3228 return RValue<UShort4>(Nucleus::createNot(val.value));
3232 RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
3234 return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
3237 RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
3239 return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
3242 RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
3244 return x86::paddusw(x, y);
3247 RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
3249 return x86::psubusw(x, y);
3252 RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
3254 return x86::pmulhuw(x, y);
3257 RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3259 return x86::pavgw(x, y);
3262 RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y)
3264 return x86::packuswb(x, y);
3267 Type *UShort4::getType()
3269 if(CPUID::supportsMMX2())
3271 return MMX::getType();
3275 return T(VectorType::get(UShort::getType(), 4));
3279 Short8::Short8(short c)
3281 int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
3282 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3285 Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
3287 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3288 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3291 Short8::Short8(RValue<Short8> rhs)
3293 storeValue(rhs.value);
3296 Short8::Short8(const Reference<Short8> &rhs)
3298 Value *value = rhs.loadValue();
3302 Short8::Short8(RValue<Short4> lo, RValue<Short4> hi)
3304 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
3305 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
3307 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
3308 long2 = Nucleus::createInsertElement(long2, loLong, 0);
3309 long2 = Nucleus::createInsertElement(long2, hiLong, 1);
3310 Value *short8 = Nucleus::createBitCast(long2, Short8::getType());
3315 RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs)
3317 return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value));
3320 RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs)
3322 return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
3325 RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3327 return x86::psllw(lhs, rhs); // FIXME: Fallback required
3330 RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3332 return x86::psraw(lhs, rhs); // FIXME: Fallback required
3335 RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3337 return x86::pmaddwd(x, y); // FIXME: Fallback required
3340 RValue<Int4> Abs(RValue<Int4> x)
3342 if(CPUID::supportsSSSE3())
3344 return x86::pabsd(x);
3348 Int4 mask = (x >> 31);
3349 return (mask ^ x) - mask;
3353 RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3355 return x86::pmulhw(x, y); // FIXME: Fallback required
3358 Type *Short8::getType()
3360 return T(VectorType::get(Short::getType(), 8));
3363 UShort8::UShort8(unsigned short c)
3365 int64_t constantVector[8] = {c, c, c, c, c, c, c, c};
3366 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3369 UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
3371 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3372 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3375 UShort8::UShort8(RValue<UShort8> rhs)
3377 storeValue(rhs.value);
3380 UShort8::UShort8(const Reference<UShort8> &rhs)
3382 Value *value = rhs.loadValue();
3386 UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi)
3388 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
3389 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
3391 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
3392 long2 = Nucleus::createInsertElement(long2, loLong, 0);
3393 long2 = Nucleus::createInsertElement(long2, hiLong, 1);
3394 Value *short8 = Nucleus::createBitCast(long2, Short8::getType());
3399 RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs)
3401 storeValue(rhs.value);
3406 RValue<UShort8> UShort8::operator=(const UShort8 &rhs)
3408 Value *value = rhs.loadValue();
3411 return RValue<UShort8>(value);
3414 RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs)
3416 Value *value = rhs.loadValue();
3419 return RValue<UShort8>(value);
3422 RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs)
3424 return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
3427 RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3429 return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs)); // FIXME: Fallback required
3432 RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3434 return x86::psrlw(lhs, rhs); // FIXME: Fallback required
3437 RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
3439 return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value));
3442 RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs)
3444 return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value));
3447 RValue<UShort8> operator+=(UShort8 &lhs, RValue<UShort8> rhs)
3449 return lhs = lhs + rhs;
3452 RValue<UShort8> operator~(RValue<UShort8> val)
3454 return RValue<UShort8>(Nucleus::createNot(val.value));
3457 RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
3479 Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
3480 Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb);
3481 Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
3483 return RValue<UShort8>(short8);
3486 RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3488 return x86::pmulhuw(x, y); // FIXME: Fallback required
3491 Type *UShort8::getType()
3493 return T(VectorType::get(UShort::getType(), 8));
3496 Int::Int(Argument<Int> argument)
3498 storeValue(argument.value);
3501 Int::Int(RValue<Byte> cast)
3503 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3505 storeValue(integer);
3508 Int::Int(RValue<SByte> cast)
3510 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3512 storeValue(integer);
3515 Int::Int(RValue<Short> cast)
3517 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3519 storeValue(integer);
3522 Int::Int(RValue<UShort> cast)
3524 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3526 storeValue(integer);
3529 Int::Int(RValue<Int2> cast)
3531 *this = Extract(cast, 0);
3534 Int::Int(RValue<Long> cast)
3536 Value *integer = Nucleus::createTrunc(cast.value, Int::getType());
3538 storeValue(integer);
3541 Int::Int(RValue<Float> cast)
3543 Value *integer = Nucleus::createFPToSI(cast.value, Int::getType());
3545 storeValue(integer);
3550 storeValue(Nucleus::createConstantInt(x));
3553 Int::Int(RValue<Int> rhs)
3555 storeValue(rhs.value);
3558 Int::Int(RValue<UInt> rhs)
3560 storeValue(rhs.value);
3563 Int::Int(const Int &rhs)
3565 Value *value = rhs.loadValue();
3569 Int::Int(const Reference<Int> &rhs)
3571 Value *value = rhs.loadValue();
3575 Int::Int(const UInt &rhs)
3577 Value *value = rhs.loadValue();
3581 Int::Int(const Reference<UInt> &rhs)
3583 Value *value = rhs.loadValue();
3587 RValue<Int> Int::operator=(int rhs)
3589 return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs)));
3592 RValue<Int> Int::operator=(RValue<Int> rhs)
3594 storeValue(rhs.value);
3599 RValue<Int> Int::operator=(RValue<UInt> rhs)
3601 storeValue(rhs.value);
3603 return RValue<Int>(rhs);
3606 RValue<Int> Int::operator=(const Int &rhs)
3608 Value *value = rhs.loadValue();
3611 return RValue<Int>(value);
3614 RValue<Int> Int::operator=(const Reference<Int> &rhs)
3616 Value *value = rhs.loadValue();
3619 return RValue<Int>(value);
3622 RValue<Int> Int::operator=(const UInt &rhs)
3624 Value *value = rhs.loadValue();
3627 return RValue<Int>(value);
3630 RValue<Int> Int::operator=(const Reference<UInt> &rhs)
3632 Value *value = rhs.loadValue();
3635 return RValue<Int>(value);
3638 RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs)
3640 return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value));
3643 RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs)
3645 return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value));
3648 RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs)
3650 return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value));
3653 RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs)
3655 return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value));
3658 RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs)
3660 return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value));
3663 RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs)
3665 return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value));
3668 RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs)
3670 return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value));
3673 RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs)
3675 return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value));
3678 RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs)
3680 return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value));
3683 RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs)
3685 return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value));
3688 RValue<Int> operator+=(Int &lhs, RValue<Int> rhs)
3690 return lhs = lhs + rhs;
3693 RValue<Int> operator-=(Int &lhs, RValue<Int> rhs)
3695 return lhs = lhs - rhs;
3698 RValue<Int> operator*=(Int &lhs, RValue<Int> rhs)
3700 return lhs = lhs * rhs;
3703 RValue<Int> operator/=(Int &lhs, RValue<Int> rhs)
3705 return lhs = lhs / rhs;
3708 RValue<Int> operator%=(Int &lhs, RValue<Int> rhs)
3710 return lhs = lhs % rhs;
3713 RValue<Int> operator&=(Int &lhs, RValue<Int> rhs)
3715 return lhs = lhs & rhs;
3718 RValue<Int> operator|=(Int &lhs, RValue<Int> rhs)
3720 return lhs = lhs | rhs;
3723 RValue<Int> operator^=(Int &lhs, RValue<Int> rhs)
3725 return lhs = lhs ^ rhs;
3728 RValue<Int> operator<<=(Int &lhs, RValue<Int> rhs)
3730 return lhs = lhs << rhs;
3733 RValue<Int> operator>>=(Int &lhs, RValue<Int> rhs)
3735 return lhs = lhs >> rhs;
3738 RValue<Int> operator+(RValue<Int> val)
3743 RValue<Int> operator-(RValue<Int> val)
3745 return RValue<Int>(Nucleus::createNeg(val.value));
3748 RValue<Int> operator~(RValue<Int> val)
3750 return RValue<Int>(Nucleus::createNot(val.value));
3753 RValue<Int> operator++(Int &val, int) // Post-increment
3755 RValue<Int> res = val;
3757 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantInt(1)));
3758 val.storeValue(inc);
3763 const Int &operator++(Int &val) // Pre-increment
3765 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantInt(1)));
3766 val.storeValue(inc);
3771 RValue<Int> operator--(Int &val, int) // Post-decrement
3773 RValue<Int> res = val;
3775 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantInt(1)));
3776 val.storeValue(inc);
3781 const Int &operator--(Int &val) // Pre-decrement
3783 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantInt(1)));
3784 val.storeValue(inc);
3789 RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs)
3791 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
3794 RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs)
3796 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
3799 RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs)
3801 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
3804 RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs)
3806 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
3809 RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs)
3811 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
3814 RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs)
3816 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
3819 RValue<Int> Max(RValue<Int> x, RValue<Int> y)
3821 return IfThenElse(x > y, x, y);
3824 RValue<Int> Min(RValue<Int> x, RValue<Int> y)
3826 return IfThenElse(x < y, x, y);
3829 RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max)
3831 return Min(Max(x, min), max);
3834 RValue<Int> RoundInt(RValue<Float> cast)
3836 return x86::cvtss2si(cast);
3838 // return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f));
3841 Type *Int::getType()
3843 return T(llvm::Type::getInt32Ty(*::context));
3846 Long::Long(RValue<Int> cast)
3848 Value *integer = Nucleus::createSExt(cast.value, Long::getType());
3850 storeValue(integer);
3853 Long::Long(RValue<UInt> cast)
3855 Value *integer = Nucleus::createZExt(cast.value, Long::getType());
3857 storeValue(integer);
3860 Long::Long(RValue<Long> rhs)
3862 storeValue(rhs.value);
3865 RValue<Long> Long::operator=(int64_t rhs)
3867 return RValue<Long>(storeValue(Nucleus::createConstantLong(rhs)));
3870 RValue<Long> Long::operator=(RValue<Long> rhs)
3872 storeValue(rhs.value);
3877 RValue<Long> Long::operator=(const Long &rhs)
3879 Value *value = rhs.loadValue();
3882 return RValue<Long>(value);
3885 RValue<Long> Long::operator=(const Reference<Long> &rhs)
3887 Value *value = rhs.loadValue();
3890 return RValue<Long>(value);
3893 RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs)
3895 return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value));
3898 RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs)
3900 return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
3903 RValue<Long> operator+=(Long &lhs, RValue<Long> rhs)
3905 return lhs = lhs + rhs;
3908 RValue<Long> operator-=(Long &lhs, RValue<Long> rhs)
3910 return lhs = lhs - rhs;
3913 RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y)
3915 return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value));
3918 Type *Long::getType()
3920 return T(llvm::Type::getInt64Ty(*::context));
3923 UInt::UInt(Argument<UInt> argument)
3925 storeValue(argument.value);
3928 UInt::UInt(RValue<UShort> cast)
3930 Value *integer = Nucleus::createZExt(cast.value, UInt::getType());
3932 storeValue(integer);
3935 UInt::UInt(RValue<Long> cast)
3937 Value *integer = Nucleus::createTrunc(cast.value, UInt::getType());
3939 storeValue(integer);
3942 UInt::UInt(RValue<Float> cast)
3944 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
3945 // Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType());
3947 // Smallest positive value representable in UInt, but not in Int
3948 const unsigned int ustart = 0x80000000u;
3949 const float ustartf = float(ustart);
3951 // If the value is negative, store 0, otherwise store the result of the conversion
3952 storeValue((~(As<Int>(cast) >> 31) &
3953 // Check if the value can be represented as an Int
3954 IfThenElse(cast >= ustartf,
3955 // If the value is too large, subtract ustart and re-add it after conversion.
3956 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
3957 // Otherwise, just convert normally
3963 storeValue(Nucleus::createConstantInt(x));
3966 UInt::UInt(unsigned int x)
3968 storeValue(Nucleus::createConstantInt(x));
3971 UInt::UInt(RValue<UInt> rhs)
3973 storeValue(rhs.value);
3976 UInt::UInt(RValue<Int> rhs)
3978 storeValue(rhs.value);
3981 UInt::UInt(const UInt &rhs)
3983 Value *value = rhs.loadValue();
3987 UInt::UInt(const Reference<UInt> &rhs)
3989 Value *value = rhs.loadValue();
3993 UInt::UInt(const Int &rhs)
3995 Value *value = rhs.loadValue();
3999 UInt::UInt(const Reference<Int> &rhs)
4001 Value *value = rhs.loadValue();
4005 RValue<UInt> UInt::operator=(unsigned int rhs)
4007 return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs)));
4010 RValue<UInt> UInt::operator=(RValue<UInt> rhs)
4012 storeValue(rhs.value);
4017 RValue<UInt> UInt::operator=(RValue<Int> rhs)
4019 storeValue(rhs.value);
4021 return RValue<UInt>(rhs);
4024 RValue<UInt> UInt::operator=(const UInt &rhs)
4026 Value *value = rhs.loadValue();
4029 return RValue<UInt>(value);
4032 RValue<UInt> UInt::operator=(const Reference<UInt> &rhs)
4034 Value *value = rhs.loadValue();
4037 return RValue<UInt>(value);
4040 RValue<UInt> UInt::operator=(const Int &rhs)
4042 Value *value = rhs.loadValue();
4045 return RValue<UInt>(value);
4048 RValue<UInt> UInt::operator=(const Reference<Int> &rhs)
4050 Value *value = rhs.loadValue();
4053 return RValue<UInt>(value);
4056 RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs)
4058 return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value));
4061 RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs)
4063 return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value));
4066 RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs)
4068 return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value));
4071 RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs)
4073 return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value));
4076 RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs)
4078 return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value));
4081 RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs)
4083 return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value));
4086 RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs)
4088 return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value));
4091 RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs)
4093 return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value));
4096 RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs)
4098 return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value));
4101 RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs)
4103 return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value));
4106 RValue<UInt> operator+=(UInt &lhs, RValue<UInt> rhs)
4108 return lhs = lhs + rhs;
4111 RValue<UInt> operator-=(UInt &lhs, RValue<UInt> rhs)
4113 return lhs = lhs - rhs;
4116 RValue<UInt> operator*=(UInt &lhs, RValue<UInt> rhs)
4118 return lhs = lhs * rhs;
4121 RValue<UInt> operator/=(UInt &lhs, RValue<UInt> rhs)
4123 return lhs = lhs / rhs;
4126 RValue<UInt> operator%=(UInt &lhs, RValue<UInt> rhs)
4128 return lhs = lhs % rhs;
4131 RValue<UInt> operator&=(UInt &lhs, RValue<UInt> rhs)
4133 return lhs = lhs & rhs;
4136 RValue<UInt> operator|=(UInt &lhs, RValue<UInt> rhs)
4138 return lhs = lhs | rhs;
4141 RValue<UInt> operator^=(UInt &lhs, RValue<UInt> rhs)
4143 return lhs = lhs ^ rhs;
4146 RValue<UInt> operator<<=(UInt &lhs, RValue<UInt> rhs)
4148 return lhs = lhs << rhs;
4151 RValue<UInt> operator>>=(UInt &lhs, RValue<UInt> rhs)
4153 return lhs = lhs >> rhs;
4156 RValue<UInt> operator+(RValue<UInt> val)
4161 RValue<UInt> operator-(RValue<UInt> val)
4163 return RValue<UInt>(Nucleus::createNeg(val.value));
4166 RValue<UInt> operator~(RValue<UInt> val)
4168 return RValue<UInt>(Nucleus::createNot(val.value));
4171 RValue<UInt> operator++(UInt &val, int) // Post-increment
4173 RValue<UInt> res = val;
4175 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantInt(1)));
4176 val.storeValue(inc);
4181 const UInt &operator++(UInt &val) // Pre-increment
4183 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantInt(1)));
4184 val.storeValue(inc);
4189 RValue<UInt> operator--(UInt &val, int) // Post-decrement
4191 RValue<UInt> res = val;
4193 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantInt(1)));
4194 val.storeValue(inc);
4199 const UInt &operator--(UInt &val) // Pre-decrement
4201 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantInt(1)));
4202 val.storeValue(inc);
4207 RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y)
4209 return IfThenElse(x > y, x, y);
4212 RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y)
4214 return IfThenElse(x < y, x, y);
4217 RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max)
4219 return Min(Max(x, min), max);
4222 RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs)
4224 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
4227 RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs)
4229 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
4232 RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs)
4234 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
4237 RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs)
4239 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
4242 RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs)
4244 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4247 RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs)
4249 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4252 // RValue<UInt> RoundUInt(RValue<Float> cast)
4254 // return x86::cvtss2si(val); // FIXME: Unsigned
4256 // // return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f));
4259 Type *UInt::getType()
4261 return T(llvm::Type::getInt32Ty(*::context));
4264 // Int2::Int2(RValue<Int> cast)
4266 // Value *extend = Nucleus::createZExt(cast.value, Long::getType());
4267 // Value *vector = Nucleus::createBitCast(extend, Int2::getType());
4269 // int shuffle[2] = {0, 0};
4270 // Value *replicate = Nucleus::createShuffleVector(vector, vector, shuffle);
4272 // storeValue(replicate);
4275 Int2::Int2(RValue<Int4> cast)
4277 Value *long2 = Nucleus::createBitCast(cast.value, T(VectorType::get(Long::getType(), 2)));
4278 Value *element = Nucleus::createExtractElement(long2, Long::getType(), 0);
4279 Value *int2 = Nucleus::createBitCast(element, Int2::getType());
4284 Int2::Int2(int x, int y)
4286 int64_t constantVector[2] = {x, y};
4287 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(Int::getType(), 2))));
4289 storeValue(Nucleus::createBitCast(vector, getType()));
4292 Int2::Int2(RValue<Int2> rhs)
4294 storeValue(rhs.value);
4297 Int2::Int2(const Int2 &rhs)
4299 Value *value = rhs.loadValue();
4303 Int2::Int2(const Reference<Int2> &rhs)
4305 Value *value = rhs.loadValue();
4309 Int2::Int2(RValue<Int> lo, RValue<Int> hi)
4311 if(CPUID::supportsMMX2())
4315 // punpckldq mm0, mm1
4317 Value *loLong = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), lo.value, 0);
4318 loLong = Nucleus::createInsertElement(loLong, V(ConstantInt::get(Int::getType(), 0)), 1);
4319 Value *hiLong = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), hi.value, 0);
4320 hiLong = Nucleus::createInsertElement(hiLong, V(ConstantInt::get(Int::getType(), 0)), 1);
4322 storeValue(As<Int2>(UnpackLow(As<Int2>(loLong), As<Int2>(hiLong))).value);
4326 int shuffle[2] = {0, 1};
4327 Value *packed = Nucleus::createShuffleVector(Nucleus::createBitCast(lo.value, T(VectorType::get(Int::getType(), 1))), Nucleus::createBitCast(hi.value, T(VectorType::get(Int::getType(), 1))), shuffle);
4329 storeValue(Nucleus::createBitCast(packed, Int2::getType()));
4333 RValue<Int2> Int2::operator=(RValue<Int2> rhs)
4335 storeValue(rhs.value);
4340 RValue<Int2> Int2::operator=(const Int2 &rhs)
4342 Value *value = rhs.loadValue();
4345 return RValue<Int2>(value);
4348 RValue<Int2> Int2::operator=(const Reference<Int2> &rhs)
4350 Value *value = rhs.loadValue();
4353 return RValue<Int2>(value);
4356 RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs)
4358 if(CPUID::supportsMMX2())
4360 return x86::paddd(lhs, rhs);
4364 return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
4368 RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs)
4370 if(CPUID::supportsMMX2())
4372 return x86::psubd(lhs, rhs);
4376 return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
4380 // RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs)
4382 // return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value));
4385 // RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs)
4387 // return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value));
4390 // RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs)
4392 // return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value));
4395 RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs)
4397 if(CPUID::supportsMMX2())
4399 return As<Int2>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
4403 return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
4407 RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs)
4409 if(CPUID::supportsMMX2())
4411 return As<Int2>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
4415 return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
4419 RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs)
4421 if(CPUID::supportsMMX2())
4423 return As<Int2>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
4427 return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
4431 RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
4433 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
4435 return x86::pslld(lhs, rhs);
4438 RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
4440 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
4442 return x86::psrad(lhs, rhs);
4445 RValue<Int2> operator+=(Int2 &lhs, RValue<Int2> rhs)
4447 return lhs = lhs + rhs;
4450 RValue<Int2> operator-=(Int2 &lhs, RValue<Int2> rhs)
4452 return lhs = lhs - rhs;
4455 // RValue<Int2> operator*=(Int2 &lhs, RValue<Int2> rhs)
4457 // return lhs = lhs * rhs;
4460 // RValue<Int2> operator/=(Int2 &lhs, RValue<Int2> rhs)
4462 // return lhs = lhs / rhs;
4465 // RValue<Int2> operator%=(Int2 &lhs, RValue<Int2> rhs)
4467 // return lhs = lhs % rhs;
4470 RValue<Int2> operator&=(Int2 &lhs, RValue<Int2> rhs)
4472 return lhs = lhs & rhs;
4475 RValue<Int2> operator|=(Int2 &lhs, RValue<Int2> rhs)
4477 return lhs = lhs | rhs;
4480 RValue<Int2> operator^=(Int2 &lhs, RValue<Int2> rhs)
4482 return lhs = lhs ^ rhs;
4485 RValue<Int2> operator<<=(Int2 &lhs, unsigned char rhs)
4487 return lhs = lhs << rhs;
4490 RValue<Int2> operator>>=(Int2 &lhs, unsigned char rhs)
4492 return lhs = lhs >> rhs;
4495 // RValue<Int2> operator+(RValue<Int2> val)
4500 // RValue<Int2> operator-(RValue<Int2> val)
4502 // return RValue<Int2>(Nucleus::createNeg(val.value));
4505 RValue<Int2> operator~(RValue<Int2> val)
4507 if(CPUID::supportsMMX2())
4509 return val ^ Int2(0xFFFFFFFF, 0xFFFFFFFF);
4513 return RValue<Int2>(Nucleus::createNot(val.value));
4517 RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y)
4519 if(CPUID::supportsMMX2())
4521 return x86::punpckldq(x, y);
4525 int shuffle[2] = {0, 2};
4526 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
4528 return As<Short4>(packed);
4532 RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
4534 if(CPUID::supportsMMX2())
4536 return x86::punpckhdq(x, y);
4540 int shuffle[2] = {1, 3};
4541 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
4543 return As<Short4>(packed);
4547 RValue<Int> Extract(RValue<Int2> val, int i)
4549 if(false) // FIXME: LLVM does not generate optimal code
4551 return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i));
4557 return RValue<Int>(Nucleus::createExtractElement(Nucleus::createBitCast(val.value, T(VectorType::get(Int::getType(), 2))), Int::getType(), 0));
4561 Int2 val2 = As<Int2>(UnpackHigh(val, val));
4563 return Extract(val2, 0);
4568 RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i)
4570 return RValue<Int2>(Nucleus::createBitCast(Nucleus::createInsertElement(Nucleus::createBitCast(val.value, T(VectorType::get(Int::getType(), 2))), element.value, i), Int2::getType()));
4573 Type *Int2::getType()
4575 if(CPUID::supportsMMX2())
4577 return MMX::getType();
4581 return T(VectorType::get(Int::getType(), 2));
4585 UInt2::UInt2(unsigned int x, unsigned int y)
4587 int64_t constantVector[2] = {x, y};
4588 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(UInt::getType(), 2))));
4590 storeValue(Nucleus::createBitCast(vector, getType()));
4593 UInt2::UInt2(RValue<UInt2> rhs)
4595 storeValue(rhs.value);
4598 UInt2::UInt2(const UInt2 &rhs)
4600 Value *value = rhs.loadValue();
4604 UInt2::UInt2(const Reference<UInt2> &rhs)
4606 Value *value = rhs.loadValue();
4610 RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs)
4612 storeValue(rhs.value);
4617 RValue<UInt2> UInt2::operator=(const UInt2 &rhs)
4619 Value *value = rhs.loadValue();
4622 return RValue<UInt2>(value);
4625 RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs)
4627 Value *value = rhs.loadValue();
4630 return RValue<UInt2>(value);
4633 RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs)
4635 if(CPUID::supportsMMX2())
4637 return As<UInt2>(x86::paddd(As<Int2>(lhs), As<Int2>(rhs)));
4641 return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
4645 RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs)
4647 if(CPUID::supportsMMX2())
4649 return As<UInt2>(x86::psubd(As<Int2>(lhs), As<Int2>(rhs)));
4653 return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
4657 // RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs)
4659 // return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value));
4662 // RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs)
4664 // return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value));
4667 // RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs)
4669 // return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value));
4672 RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs)
4674 if(CPUID::supportsMMX2())
4676 return As<UInt2>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
4680 return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
4684 RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs)
4686 if(CPUID::supportsMMX2())
4688 return As<UInt2>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
4692 return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
4696 RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs)
4698 if(CPUID::supportsMMX2())
4700 return As<UInt2>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
4704 return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
4708 RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
4710 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
4712 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
4715 RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
4717 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
4719 return x86::psrld(lhs, rhs);
4722 RValue<UInt2> operator+=(UInt2 &lhs, RValue<UInt2> rhs)
4724 return lhs = lhs + rhs;
4727 RValue<UInt2> operator-=(UInt2 &lhs, RValue<UInt2> rhs)
4729 return lhs = lhs - rhs;
4732 // RValue<UInt2> operator*=(UInt2 &lhs, RValue<UInt2> rhs)
4734 // return lhs = lhs * rhs;
4737 // RValue<UInt2> operator/=(UInt2 &lhs, RValue<UInt2> rhs)
4739 // return lhs = lhs / rhs;
4742 // RValue<UInt2> operator%=(UInt2 &lhs, RValue<UInt2> rhs)
4744 // return lhs = lhs % rhs;
4747 RValue<UInt2> operator&=(UInt2 &lhs, RValue<UInt2> rhs)
4749 return lhs = lhs & rhs;
4752 RValue<UInt2> operator|=(UInt2 &lhs, RValue<UInt2> rhs)
4754 return lhs = lhs | rhs;
4757 RValue<UInt2> operator^=(UInt2 &lhs, RValue<UInt2> rhs)
4759 return lhs = lhs ^ rhs;
4762 RValue<UInt2> operator<<=(UInt2 &lhs, unsigned char rhs)
4764 return lhs = lhs << rhs;
4767 RValue<UInt2> operator>>=(UInt2 &lhs, unsigned char rhs)
4769 return lhs = lhs >> rhs;
4772 // RValue<UInt2> operator+(RValue<UInt2> val)
4777 // RValue<UInt2> operator-(RValue<UInt2> val)
4779 // return RValue<UInt2>(Nucleus::createNeg(val.value));
4782 RValue<UInt2> operator~(RValue<UInt2> val)
4784 if(CPUID::supportsMMX2())
4786 return val ^ UInt2(0xFFFFFFFF, 0xFFFFFFFF);
4790 return RValue<UInt2>(Nucleus::createNot(val.value));
4794 Type *UInt2::getType()
4796 if(CPUID::supportsMMX2())
4798 return MMX::getType();
4802 return T(VectorType::get(UInt::getType(), 2));
4806 Int4::Int4(RValue<Byte4> cast)
4808 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
4809 Value *a = Nucleus::createInsertElement(V(UndefValue::get(Int4::getType())), x, 0);
4813 if (CPUID::supportsSSE4_1())
4815 e = x86::pmovzxbd(RValue<Int4>(a)).value;
4819 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
4820 Value *b = Nucleus::createBitCast(a, Byte16::getType());
4821 Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Byte16::getType())), swizzle);
4823 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
4824 Value *d = Nucleus::createBitCast(c, Short8::getType());
4825 e = Nucleus::createShuffleVector(d, V(Nucleus::createNullValue(Short8::getType())), swizzle2);
4828 Value *f = Nucleus::createBitCast(e, Int4::getType());
4832 Int4::Int4(RValue<SByte4> cast)
4834 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
4835 Value *a = Nucleus::createInsertElement(V(UndefValue::get(Int4::getType())), x, 0);
4839 if (CPUID::supportsSSE4_1())
4841 g = x86::pmovsxbd(RValue<Int4>(a)).value;
4845 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
4846 Value *b = Nucleus::createBitCast(a, Byte16::getType());
4847 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
4849 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
4850 Value *d = Nucleus::createBitCast(c, Short8::getType());
4851 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
4853 Value *f = Nucleus::createBitCast(e, Int4::getType());
4854 // g = Nucleus::createAShr(f, Nucleus::createConstantInt(24));
4855 g = x86::psrad(RValue<Int4>(f), 24).value;
4861 Int4::Int4(RValue<Float4> cast)
4863 Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType());
4868 Int4::Int4(RValue<Short4> cast)
4870 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
4871 Value *element = Nucleus::createBitCast(cast.value, Long::getType());
4872 long2 = Nucleus::createInsertElement(long2, element, 0);
4873 RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType()));
4875 if(CPUID::supportsSSE4_1())
4877 storeValue(x86::pmovsxwd(vector).value);
4881 Value *b = Nucleus::createBitCast(vector.value, Short8::getType());
4883 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
4884 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
4885 Value *d = Nucleus::createBitCast(c, Int4::getType());
4888 // Each Short is packed into each Int in the (Short | Short) format.
4889 // Shifting by 16 will retrieve the original Short value.
4890 // Shifting an Int will propagate the sign bit, which will work
4891 // for both positive and negative values of a Short.
4896 Int4::Int4(RValue<UShort4> cast)
4898 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
4899 Value *element = Nucleus::createBitCast(cast.value, Long::getType());
4900 long2 = Nucleus::createInsertElement(long2, element, 0);
4901 RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType()));
4903 if(CPUID::supportsSSE4_1())
4905 storeValue(x86::pmovzxwd(RValue<Int4>(vector)).value);
4909 Value *b = Nucleus::createBitCast(vector.value, Short8::getType());
4911 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
4912 Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Short8::getType())), swizzle);
4913 Value *d = Nucleus::createBitCast(c, Int4::getType());
4918 Int4::Int4(int xyzw)
4920 constant(xyzw, xyzw, xyzw, xyzw);
4923 Int4::Int4(int x, int yzw)
4925 constant(x, yzw, yzw, yzw);
4928 Int4::Int4(int x, int y, int zw)
4930 constant(x, y, zw, zw);
4933 Int4::Int4(int x, int y, int z, int w)
4935 constant(x, y, z, w);
4938 void Int4::constant(int x, int y, int z, int w)
4940 int64_t constantVector[4] = {x, y, z, w};
4941 storeValue(Nucleus::createConstantVector(constantVector, getType()));
4944 Int4::Int4(RValue<Int4> rhs)
4946 storeValue(rhs.value);
4949 Int4::Int4(const Int4 &rhs)
4951 Value *value = rhs.loadValue();
4955 Int4::Int4(const Reference<Int4> &rhs)
4957 Value *value = rhs.loadValue();
4961 Int4::Int4(RValue<UInt4> rhs)
4963 storeValue(rhs.value);
4966 Int4::Int4(const UInt4 &rhs)
4968 Value *value = rhs.loadValue();
4972 Int4::Int4(const Reference<UInt4> &rhs)
4974 Value *value = rhs.loadValue();
4978 Int4::Int4(RValue<Int2> lo, RValue<Int2> hi)
4980 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
4981 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
4983 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
4984 long2 = Nucleus::createInsertElement(long2, loLong, 0);
4985 long2 = Nucleus::createInsertElement(long2, hiLong, 1);
4986 Value *int4 = Nucleus::createBitCast(long2, Int4::getType());
4991 Int4::Int4(RValue<Int> rhs)
4993 Value *vector = loadValue();
4994 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
4996 int swizzle[4] = {0, 0, 0, 0};
4997 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
4999 storeValue(replicate);
5002 Int4::Int4(const Int &rhs)
5004 *this = RValue<Int>(rhs.loadValue());
5007 Int4::Int4(const Reference<Int> &rhs)
5009 *this = RValue<Int>(rhs.loadValue());
5012 RValue<Int4> Int4::operator=(RValue<Int4> rhs)
5014 storeValue(rhs.value);
5019 RValue<Int4> Int4::operator=(const Int4 &rhs)
5021 Value *value = rhs.loadValue();
5024 return RValue<Int4>(value);
5027 RValue<Int4> Int4::operator=(const Reference<Int4> &rhs)
5029 Value *value = rhs.loadValue();
5032 return RValue<Int4>(value);
5035 RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs)
5037 return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value));
5040 RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs)
5042 return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value));
5045 RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs)
5047 return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value));
5050 RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs)
5052 return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value));
5055 RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs)
5057 return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value));
5060 RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs)
5062 return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value));
5065 RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs)
5067 return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value));
5070 RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs)
5072 return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value));
5075 RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
5077 return x86::pslld(lhs, rhs);
5080 RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
5082 return x86::psrad(lhs, rhs);
5085 RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
5087 return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value));
5090 RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs)
5092 return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value));
5095 RValue<Int4> operator+=(Int4 &lhs, RValue<Int4> rhs)
5097 return lhs = lhs + rhs;
5100 RValue<Int4> operator-=(Int4 &lhs, RValue<Int4> rhs)
5102 return lhs = lhs - rhs;
5105 RValue<Int4> operator*=(Int4 &lhs, RValue<Int4> rhs)
5107 return lhs = lhs * rhs;
5110 // RValue<Int4> operator/=(Int4 &lhs, RValue<Int4> rhs)
5112 // return lhs = lhs / rhs;
5115 // RValue<Int4> operator%=(Int4 &lhs, RValue<Int4> rhs)
5117 // return lhs = lhs % rhs;
5120 RValue<Int4> operator&=(Int4 &lhs, RValue<Int4> rhs)
5122 return lhs = lhs & rhs;
5125 RValue<Int4> operator|=(Int4 &lhs, RValue<Int4> rhs)
5127 return lhs = lhs | rhs;
5130 RValue<Int4> operator^=(Int4 &lhs, RValue<Int4> rhs)
5132 return lhs = lhs ^ rhs;
5135 RValue<Int4> operator<<=(Int4 &lhs, unsigned char rhs)
5137 return lhs = lhs << rhs;
5140 RValue<Int4> operator>>=(Int4 &lhs, unsigned char rhs)
5142 return lhs = lhs >> rhs;
5145 RValue<Int4> operator+(RValue<Int4> val)
5150 RValue<Int4> operator-(RValue<Int4> val)
5152 return RValue<Int4>(Nucleus::createNeg(val.value));
5155 RValue<Int4> operator~(RValue<Int4> val)
5157 return RValue<Int4>(Nucleus::createNot(val.value));
5160 RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
5162 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5163 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5164 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
5165 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5168 RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
5170 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType()));
5173 RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
5175 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5176 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5177 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType()));
5178 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5181 RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
5183 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
5186 RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
5188 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5189 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5190 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType()));
5191 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5194 RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
5196 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType()));
5199 RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
5201 if(CPUID::supportsSSE4_1())
5203 return x86::pmaxsd(x, y);
5207 RValue<Int4> greater = CmpNLE(x, y);
5208 return x & greater | y & ~greater;
5212 RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
5214 if(CPUID::supportsSSE4_1())
5216 return x86::pminsd(x, y);
5220 RValue<Int4> less = CmpLT(x, y);
5221 return x & less | y & ~less;
5225 RValue<Int4> RoundInt(RValue<Float4> cast)
5227 return x86::cvtps2dq(cast);
5230 RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y)
5232 return x86::packssdw(x, y);
5235 RValue<Int> Extract(RValue<Int4> x, int i)
5237 return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i));
5240 RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i)
5242 return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i));
5245 RValue<Int> SignMask(RValue<Int4> x)
5247 return x86::movmskps(As<Float4>(x));
5250 RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select)
5252 return RValue<Int4>(createSwizzle4(x.value, select));
5255 Type *Int4::getType()
5257 return T(VectorType::get(Int::getType(), 4));
5260 UInt4::UInt4(RValue<Float4> cast)
5262 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
5263 // Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
5265 // Smallest positive value representable in UInt, but not in Int
5266 const unsigned int ustart = 0x80000000u;
5267 const float ustartf = float(ustart);
5269 // Check if the value can be represented as an Int
5270 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
5271 // If the value is too large, subtract ustart and re-add it after conversion.
5272 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
5273 // Otherwise, just convert normally
5274 (~uiValue & Int4(cast));
5275 // If the value is negative, store 0, otherwise store the result of the conversion
5276 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
5279 UInt4::UInt4(int xyzw)
5281 constant(xyzw, xyzw, xyzw, xyzw);
5284 UInt4::UInt4(int x, int yzw)
5286 constant(x, yzw, yzw, yzw);
5289 UInt4::UInt4(int x, int y, int zw)
5291 constant(x, y, zw, zw);
5294 UInt4::UInt4(int x, int y, int z, int w)
5296 constant(x, y, z, w);
5299 void UInt4::constant(int x, int y, int z, int w)
5301 int64_t constantVector[4] = {x, y, z, w};
5302 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5305 UInt4::UInt4(RValue<UInt4> rhs)
5307 storeValue(rhs.value);
5310 UInt4::UInt4(const UInt4 &rhs)
5312 Value *value = rhs.loadValue();
5316 UInt4::UInt4(const Reference<UInt4> &rhs)
5318 Value *value = rhs.loadValue();
5322 UInt4::UInt4(RValue<Int4> rhs)
5324 storeValue(rhs.value);
5327 UInt4::UInt4(const Int4 &rhs)
5329 Value *value = rhs.loadValue();
5333 UInt4::UInt4(const Reference<Int4> &rhs)
5335 Value *value = rhs.loadValue();
5339 UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi)
5341 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
5342 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
5344 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
5345 long2 = Nucleus::createInsertElement(long2, loLong, 0);
5346 long2 = Nucleus::createInsertElement(long2, hiLong, 1);
5347 Value *uint4 = Nucleus::createBitCast(long2, Int4::getType());
5352 RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs)
5354 storeValue(rhs.value);
5359 RValue<UInt4> UInt4::operator=(const UInt4 &rhs)
5361 Value *value = rhs.loadValue();
5364 return RValue<UInt4>(value);
5367 RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs)
5369 Value *value = rhs.loadValue();
5372 return RValue<UInt4>(value);
5375 RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs)
5377 return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value));
5380 RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs)
5382 return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value));
5385 RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs)
5387 return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value));
5390 RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs)
5392 return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value));
5395 RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs)
5397 return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value));
5400 RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs)
5402 return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value));
5405 RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs)
5407 return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value));
5410 RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs)
5412 return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
5415 RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
5417 return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
5420 RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
5422 return x86::psrld(lhs, rhs);
5425 RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)
5427 return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value));
5430 RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs)
5432 return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value));
5435 RValue<UInt4> operator+=(UInt4 &lhs, RValue<UInt4> rhs)
5437 return lhs = lhs + rhs;
5440 RValue<UInt4> operator-=(UInt4 &lhs, RValue<UInt4> rhs)
5442 return lhs = lhs - rhs;
5445 RValue<UInt4> operator*=(UInt4 &lhs, RValue<UInt4> rhs)
5447 return lhs = lhs * rhs;
5450 // RValue<UInt4> operator/=(UInt4 &lhs, RValue<UInt4> rhs)
5452 // return lhs = lhs / rhs;
5455 // RValue<UInt4> operator%=(UInt4 &lhs, RValue<UInt4> rhs)
5457 // return lhs = lhs % rhs;
5460 RValue<UInt4> operator&=(UInt4 &lhs, RValue<UInt4> rhs)
5462 return lhs = lhs & rhs;
5465 RValue<UInt4> operator|=(UInt4 &lhs, RValue<UInt4> rhs)
5467 return lhs = lhs | rhs;
5470 RValue<UInt4> operator^=(UInt4 &lhs, RValue<UInt4> rhs)
5472 return lhs = lhs ^ rhs;
5475 RValue<UInt4> operator<<=(UInt4 &lhs, unsigned char rhs)
5477 return lhs = lhs << rhs;
5480 RValue<UInt4> operator>>=(UInt4 &lhs, unsigned char rhs)
5482 return lhs = lhs >> rhs;
5485 RValue<UInt4> operator+(RValue<UInt4> val)
5490 RValue<UInt4> operator-(RValue<UInt4> val)
5492 return RValue<UInt4>(Nucleus::createNeg(val.value));
5495 RValue<UInt4> operator~(RValue<UInt4> val)
5497 return RValue<UInt4>(Nucleus::createNot(val.value));
5500 RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
5502 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5503 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5504 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
5505 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
5508 RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
5510 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType()));
5513 RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
5515 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5516 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5517 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType()));
5518 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
5521 RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
5523 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
5526 RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
5528 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5529 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5530 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType()));
5531 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
5534 RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
5536 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType()));
5539 RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
5541 if(CPUID::supportsSSE4_1())
5543 return x86::pmaxud(x, y);
5547 RValue<UInt4> greater = CmpNLE(x, y);
5548 return x & greater | y & ~greater;
5552 RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
5554 if(CPUID::supportsSSE4_1())
5556 return x86::pminud(x, y);
5560 RValue<UInt4> less = CmpLT(x, y);
5561 return x & less | y & ~less;
5565 RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y)
5567 return x86::packusdw(As<Int4>(x), As<Int4>(y));
5570 Type *UInt4::getType()
5572 return T(VectorType::get(UInt::getType(), 4));
5575 Float::Float(RValue<Int> cast)
5577 Value *integer = Nucleus::createSIToFP(cast.value, Float::getType());
5579 storeValue(integer);
5582 Float::Float(float x)
5584 storeValue(Nucleus::createConstantFloat(x));
5587 Float::Float(RValue<Float> rhs)
5589 storeValue(rhs.value);
5592 Float::Float(const Float &rhs)
5594 Value *value = rhs.loadValue();
5598 Float::Float(const Reference<Float> &rhs)
5600 Value *value = rhs.loadValue();
5604 RValue<Float> Float::operator=(RValue<Float> rhs)
5606 storeValue(rhs.value);
5611 RValue<Float> Float::operator=(const Float &rhs)
5613 Value *value = rhs.loadValue();
5616 return RValue<Float>(value);
5619 RValue<Float> Float::operator=(const Reference<Float> &rhs)
5621 Value *value = rhs.loadValue();
5624 return RValue<Float>(value);
5627 RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs)
5629 return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value));
5632 RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs)
5634 return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value));
5637 RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs)
5639 return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value));
5642 RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs)
5644 return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value));
5647 RValue<Float> operator+=(Float &lhs, RValue<Float> rhs)
5649 return lhs = lhs + rhs;
5652 RValue<Float> operator-=(Float &lhs, RValue<Float> rhs)
5654 return lhs = lhs - rhs;
5657 RValue<Float> operator*=(Float &lhs, RValue<Float> rhs)
5659 return lhs = lhs * rhs;
5662 RValue<Float> operator/=(Float &lhs, RValue<Float> rhs)
5664 return lhs = lhs / rhs;
5667 RValue<Float> operator+(RValue<Float> val)
5672 RValue<Float> operator-(RValue<Float> val)
5674 return RValue<Float>(Nucleus::createFNeg(val.value));
5677 RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs)
5679 return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value));
5682 RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs)
5684 return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value));
5687 RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs)
5689 return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value));
5692 RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs)
5694 return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value));
5697 RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs)
5699 return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value));
5702 RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs)
5704 return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value));
5707 RValue<Float> Abs(RValue<Float> x)
5709 return IfThenElse(x > 0.0f, x, -x);
5712 RValue<Float> Max(RValue<Float> x, RValue<Float> y)
5714 return IfThenElse(x > y, x, y);
5717 RValue<Float> Min(RValue<Float> x, RValue<Float> y)
5719 return IfThenElse(x < y, x, y);
5722 RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
5726 // rcpss uses a piecewise-linear approximation which minimizes the relative error
5727 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
5728 return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
5732 return x86::rcpss(x);
5736 RValue<Float> RcpSqrt_pp(RValue<Float> x)
5738 return x86::rsqrtss(x);
5741 RValue<Float> Sqrt(RValue<Float> x)
5743 return x86::sqrtss(x);
5746 RValue<Float> Round(RValue<Float> x)
5748 if(CPUID::supportsSSE4_1())
5750 return x86::roundss(x, 0);
5754 return Float4(Round(Float4(x))).x;
5758 RValue<Float> Trunc(RValue<Float> x)
5760 if(CPUID::supportsSSE4_1())
5762 return x86::roundss(x, 3);
5766 return Float(Int(x)); // Rounded toward zero
5770 RValue<Float> Frac(RValue<Float> x)
5772 if(CPUID::supportsSSE4_1())
5774 return x - x86::floorss(x);
5778 return Float4(Frac(Float4(x))).x;
5782 RValue<Float> Floor(RValue<Float> x)
5784 if(CPUID::supportsSSE4_1())
5786 return x86::floorss(x);
5790 return Float4(Floor(Float4(x))).x;
5794 RValue<Float> Ceil(RValue<Float> x)
5796 if(CPUID::supportsSSE4_1())
5798 return x86::ceilss(x);
5802 return Float4(Ceil(Float4(x))).x;
5806 Type *Float::getType()
5808 return T(llvm::Type::getFloatTy(*::context));
5811 Float2::Float2(RValue<Float4> cast)
5813 Value *int64x2 = Nucleus::createBitCast(cast.value, T(VectorType::get(Long::getType(), 2)));
5814 Value *int64 = Nucleus::createExtractElement(int64x2, Long::getType(), 0);
5815 Value *float2 = Nucleus::createBitCast(int64, Float2::getType());
5820 Type *Float2::getType()
5822 return T(VectorType::get(Float::getType(), 2));
5825 Float4::Float4(RValue<Byte4> cast) : FloatXYZW(this)
5828 Value *xyzw = Nucleus::createUIToFP(cast.value, Float4::getType()); // FIXME: Crashes
5830 Value *vector = loadValue();
5832 Value *i8x = Nucleus::createExtractElement(cast.value, 0);
5833 Value *f32x = Nucleus::createUIToFP(i8x, Float::getType());
5834 Value *x = Nucleus::createInsertElement(vector, f32x, 0);
5836 Value *i8y = Nucleus::createExtractElement(cast.value, V(Nucleus::createConstantInt(1)));
5837 Value *f32y = Nucleus::createUIToFP(i8y, Float::getType());
5838 Value *xy = Nucleus::createInsertElement(x, f32y, V(Nucleus::createConstantInt(1)));
5840 Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
5841 Value *f32z = Nucleus::createUIToFP(i8z, Float::getType());
5842 Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
5844 Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
5845 Value *f32w = Nucleus::createUIToFP(i8w, Float::getType());
5846 Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
5848 Value *a = Int4(cast).loadValue();
5849 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
5855 Float4::Float4(RValue<SByte4> cast) : FloatXYZW(this)
5858 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType()); // FIXME: Crashes
5860 Value *vector = loadValue();
5862 Value *i8x = Nucleus::createExtractElement(cast.value, 0);
5863 Value *f32x = Nucleus::createSIToFP(i8x, Float::getType());
5864 Value *x = Nucleus::createInsertElement(vector, f32x, 0);
5866 Value *i8y = Nucleus::createExtractElement(cast.value, V(Nucleus::createConstantInt(1)));
5867 Value *f32y = Nucleus::createSIToFP(i8y, Float::getType());
5868 Value *xy = Nucleus::createInsertElement(x, f32y, V(Nucleus::createConstantInt(1)));
5870 Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
5871 Value *f32z = Nucleus::createSIToFP(i8z, Float::getType());
5872 Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
5874 Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
5875 Value *f32w = Nucleus::createSIToFP(i8w, Float::getType());
5876 Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
5878 Value *a = Int4(cast).loadValue();
5879 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
5885 Float4::Float4(RValue<Short4> cast) : FloatXYZW(this)
5888 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
5891 Float4::Float4(RValue<UShort4> cast) : FloatXYZW(this)
5894 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
5897 Float4::Float4(RValue<Int4> cast) : FloatXYZW(this)
5899 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());
5904 Float4::Float4(RValue<UInt4> cast) : FloatXYZW(this)
5906 RValue<Float4> result = Float4(Int4(cast & UInt4(0x7FFFFFFF))) +
5907 As<Float4>((As<Int4>(cast) >> 31) & As<Int4>(Float4(0x80000000u)));
5909 storeValue(result.value);
5912 Float4::Float4() : FloatXYZW(this)
5916 Float4::Float4(float xyzw) : FloatXYZW(this)
5918 constant(xyzw, xyzw, xyzw, xyzw);
5921 Float4::Float4(float x, float yzw) : FloatXYZW(this)
5923 constant(x, yzw, yzw, yzw);
5926 Float4::Float4(float x, float y, float zw) : FloatXYZW(this)
5928 constant(x, y, zw, zw);
5931 Float4::Float4(float x, float y, float z, float w) : FloatXYZW(this)
5933 constant(x, y, z, w);
5936 void Float4::constant(float x, float y, float z, float w)
5938 double constantVector[4] = {x, y, z, w};
5939 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5942 Float4::Float4(RValue<Float4> rhs) : FloatXYZW(this)
5944 storeValue(rhs.value);
5947 Float4::Float4(const Float4 &rhs) : FloatXYZW(this)
5949 Value *value = rhs.loadValue();
5953 Float4::Float4(const Reference<Float4> &rhs) : FloatXYZW(this)
5955 Value *value = rhs.loadValue();
5959 Float4::Float4(RValue<Float> rhs) : FloatXYZW(this)
5961 Value *vector = loadValue();
5962 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
5964 int swizzle[4] = {0, 0, 0, 0};
5965 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
5967 storeValue(replicate);
5970 Float4::Float4(const Float &rhs) : FloatXYZW(this)
5972 *this = RValue<Float>(rhs.loadValue());
5975 Float4::Float4(const Reference<Float> &rhs) : FloatXYZW(this)
5977 *this = RValue<Float>(rhs.loadValue());
5980 RValue<Float4> Float4::operator=(float x)
5982 return *this = Float4(x, x, x, x);
5985 RValue<Float4> Float4::operator=(RValue<Float4> rhs)
5987 storeValue(rhs.value);
5992 RValue<Float4> Float4::operator=(const Float4 &rhs)
5994 Value *value = rhs.loadValue();
5997 return RValue<Float4>(value);
6000 RValue<Float4> Float4::operator=(const Reference<Float4> &rhs)
6002 Value *value = rhs.loadValue();
6005 return RValue<Float4>(value);
6008 RValue<Float4> Float4::operator=(RValue<Float> rhs)
6010 return *this = Float4(rhs);
6013 RValue<Float4> Float4::operator=(const Float &rhs)
6015 return *this = Float4(rhs);
6018 RValue<Float4> Float4::operator=(const Reference<Float> &rhs)
6020 return *this = Float4(rhs);
6023 RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs)
6025 return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value));
6028 RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs)
6030 return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value));
6033 RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs)
6035 return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value));
6038 RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs)
6040 return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value));
6043 RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
6045 return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value));
6048 RValue<Float4> operator+=(Float4 &lhs, RValue<Float4> rhs)
6050 return lhs = lhs + rhs;
6053 RValue<Float4> operator-=(Float4 &lhs, RValue<Float4> rhs)
6055 return lhs = lhs - rhs;
6058 RValue<Float4> operator*=(Float4 &lhs, RValue<Float4> rhs)
6060 return lhs = lhs * rhs;
6063 RValue<Float4> operator/=(Float4 &lhs, RValue<Float4> rhs)
6065 return lhs = lhs / rhs;
6068 RValue<Float4> operator%=(Float4 &lhs, RValue<Float4> rhs)
6070 return lhs = lhs % rhs;
6073 RValue<Float4> operator+(RValue<Float4> val)
6078 RValue<Float4> operator-(RValue<Float4> val)
6080 return RValue<Float4>(Nucleus::createFNeg(val.value));
6083 RValue<Float4> Abs(RValue<Float4> x)
6085 Value *vector = Nucleus::createBitCast(x.value, Int4::getType());
6086 int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
6087 Value *result = Nucleus::createAnd(vector, V(Nucleus::createConstantVector(constantVector, Int4::getType())));
6089 return RValue<Float4>(Nucleus::createBitCast(result, Float4::getType()));
6092 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
6094 return x86::maxps(x, y);
6097 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
6099 return x86::minps(x, y);
6102 RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
6106 // rcpps uses a piecewise-linear approximation which minimizes the relative error
6107 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
6108 return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
6112 return x86::rcpps(x);
6116 RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
6118 return x86::rsqrtps(x);
6121 RValue<Float4> Sqrt(RValue<Float4> x)
6123 return x86::sqrtps(x);
6126 RValue<Float4> Insert(RValue<Float4> val, RValue<Float> element, int i)
6128 return RValue<Float4>(Nucleus::createInsertElement(val.value, element.value, i));
6131 RValue<Float> Extract(RValue<Float4> x, int i)
6133 return RValue<Float>(Nucleus::createExtractElement(x.value, Float::getType(), i));
6136 RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select)
6138 return RValue<Float4>(createSwizzle4(x.value, select));
6141 RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
6145 ((imm >> 0) & 0x03) + 0,
6146 ((imm >> 2) & 0x03) + 0,
6147 ((imm >> 4) & 0x03) + 4,
6148 ((imm >> 6) & 0x03) + 4,
6151 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6154 RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y)
6156 int shuffle[4] = {0, 4, 1, 5};
6157 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6160 RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y)
6162 int shuffle[4] = {2, 6, 3, 7};
6163 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6166 RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select)
6168 Value *vector = lhs.loadValue();
6169 Value *shuffle = createMask4(vector, rhs.value, select);
6170 lhs.storeValue(shuffle);
6172 return RValue<Float4>(shuffle);
6175 RValue<Int> SignMask(RValue<Float4> x)
6177 return x86::movmskps(x);
6180 RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
6182 // return As<Int4>(x86::cmpeqps(x, y));
6183 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
6186 RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
6188 // return As<Int4>(x86::cmpltps(x, y));
6189 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
6192 RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
6194 // return As<Int4>(x86::cmpleps(x, y));
6195 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
6198 RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
6200 // return As<Int4>(x86::cmpneqps(x, y));
6201 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
6204 RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
6206 // return As<Int4>(x86::cmpnltps(x, y));
6207 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
6210 RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
6212 // return As<Int4>(x86::cmpnleps(x, y));
6213 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
6216 RValue<Float4> Round(RValue<Float4> x)
6218 if(CPUID::supportsSSE4_1())
6220 return x86::roundps(x, 0);
6224 return Float4(RoundInt(x));
6228 RValue<Float4> Trunc(RValue<Float4> x)
6230 if(CPUID::supportsSSE4_1())
6232 return x86::roundps(x, 3);
6236 return Float4(Int4(x)); // Rounded toward zero
6240 RValue<Float4> Frac(RValue<Float4> x)
6242 if(CPUID::supportsSSE4_1())
6244 return x - x86::floorps(x);
6248 Float4 frc = x - Float4(Int4(x)); // Signed fractional part
6250 return frc + As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1)));
6254 RValue<Float4> Floor(RValue<Float4> x)
6256 if(CPUID::supportsSSE4_1())
6258 return x86::floorps(x);
6266 RValue<Float4> Ceil(RValue<Float4> x)
6268 if(CPUID::supportsSSE4_1())
6270 return x86::ceilps(x);
6278 Type *Float4::getType()
6280 return T(VectorType::get(Float::getType(), 4));
6283 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
6285 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), V(Nucleus::createConstantInt(offset))));
6288 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6290 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value));
6293 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6295 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value));
6298 RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, int offset)
6300 return lhs = lhs + offset;
6303 RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<Int> offset)
6305 return lhs = lhs + offset;
6308 RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<UInt> offset)
6310 return lhs = lhs + offset;
6313 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset)
6315 return lhs + -offset;
6318 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6320 return lhs + -offset;
6323 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6325 return lhs + -offset;
6328 RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, int offset)
6330 return lhs = lhs - offset;
6333 RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset)
6335 return lhs = lhs - offset;
6338 RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset)
6340 return lhs = lhs - offset;
6345 Nucleus::createRetVoid();
6346 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6347 Nucleus::createUnreachable();
6350 void Return(RValue<Int> ret)
6352 Nucleus::createRet(ret.value);
6353 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6354 Nucleus::createUnreachable();
6357 bool branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
6359 Nucleus::createCondBr(cmp.value, bodyBB, endBB);
6360 Nucleus::setInsertBlock(bodyBB);
6365 RValue<Long> Ticks()
6367 llvm::Function *rdtsc = Intrinsic::getDeclaration(::module, Intrinsic::readcyclecounter);
6369 return RValue<Long>(V(::builder->CreateCall(rdtsc)));
6377 RValue<Int> cvtss2si(RValue<Float> val)
6379 llvm::Function *cvtss2si = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cvtss2si);
6384 return RValue<Int>(V(::builder->CreateCall(cvtss2si, RValue<Float4>(vector).value)));
6387 RValue<Int2> cvtps2pi(RValue<Float4> val)
6389 llvm::Function *cvtps2pi = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cvtps2pi);
6391 return RValue<Int2>(V(::builder->CreateCall(cvtps2pi, val.value)));
6394 RValue<Int2> cvttps2pi(RValue<Float4> val)
6396 llvm::Function *cvttps2pi = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cvttps2pi);
6398 return RValue<Int2>(V(::builder->CreateCall(cvttps2pi, val.value)));
6401 RValue<Int4> cvtps2dq(RValue<Float4> val)
6403 if(CPUID::supportsSSE2())
6405 llvm::Function *cvtps2dq = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_cvtps2dq);
6407 return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, val.value)));
6411 Int2 lo = x86::cvtps2pi(val);
6412 Int2 hi = x86::cvtps2pi(Swizzle(val, 0xEE));
6414 return Int4(lo, hi);
6418 RValue<Float> rcpss(RValue<Float> val)
6420 llvm::Function *rcpss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rcp_ss);
6422 Value *vector = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), val.value, 0);
6424 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rcpss, vector)), Float::getType(), 0));
6427 RValue<Float> sqrtss(RValue<Float> val)
6429 llvm::Function *sqrtss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_sqrt_ss);
6431 Value *vector = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), val.value, 0);
6433 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(sqrtss, vector)), Float::getType(), 0));
6436 RValue<Float> rsqrtss(RValue<Float> val)
6438 llvm::Function *rsqrtss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rsqrt_ss);
6440 Value *vector = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), val.value, 0);
6442 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rsqrtss, vector)), Float::getType(), 0));
6445 RValue<Float4> rcpps(RValue<Float4> val)
6447 llvm::Function *rcpps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rcp_ps);
6449 return RValue<Float4>(V(::builder->CreateCall(rcpps, val.value)));
6452 RValue<Float4> sqrtps(RValue<Float4> val)
6454 llvm::Function *sqrtps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_sqrt_ps);
6456 return RValue<Float4>(V(::builder->CreateCall(sqrtps, val.value)));
6459 RValue<Float4> rsqrtps(RValue<Float4> val)
6461 llvm::Function *rsqrtps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rsqrt_ps);
6463 return RValue<Float4>(V(::builder->CreateCall(rsqrtps, val.value)));
6466 RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
6468 llvm::Function *maxps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_max_ps);
6470 return RValue<Float4>(V(::builder->CreateCall2(maxps, x.value, y.value)));
6473 RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
6475 llvm::Function *minps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_min_ps);
6477 return RValue<Float4>(V(::builder->CreateCall2(minps, x.value, y.value)));
6480 RValue<Float> roundss(RValue<Float> val, unsigned char imm)
6482 llvm::Function *roundss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_round_ss);
6484 Value *undef = V(UndefValue::get(Float4::getType()));
6485 Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
6487 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(roundss, undef, vector, V(Nucleus::createConstantInt(imm)))), Float::getType(), 0));
6490 RValue<Float> floorss(RValue<Float> val)
6492 return roundss(val, 1);
6495 RValue<Float> ceilss(RValue<Float> val)
6497 return roundss(val, 2);
6500 RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
6502 llvm::Function *roundps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_round_ps);
6504 return RValue<Float4>(V(::builder->CreateCall2(roundps, val.value, V(Nucleus::createConstantInt(imm)))));
6507 RValue<Float4> floorps(RValue<Float4> val)
6509 return roundps(val, 1);
6512 RValue<Float4> ceilps(RValue<Float4> val)
6514 return roundps(val, 2);
6517 RValue<Float4> cmpps(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
6519 llvm::Function *cmpps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cmp_ps);
6521 return RValue<Float4>(V(::builder->CreateCall3(cmpps, x.value, y.value, V(Nucleus::createConstantByte(imm)))));
6524 RValue<Float4> cmpeqps(RValue<Float4> x, RValue<Float4> y)
6526 return cmpps(x, y, 0);
6529 RValue<Float4> cmpltps(RValue<Float4> x, RValue<Float4> y)
6531 return cmpps(x, y, 1);
6534 RValue<Float4> cmpleps(RValue<Float4> x, RValue<Float4> y)
6536 return cmpps(x, y, 2);
6539 RValue<Float4> cmpunordps(RValue<Float4> x, RValue<Float4> y)
6541 return cmpps(x, y, 3);
6544 RValue<Float4> cmpneqps(RValue<Float4> x, RValue<Float4> y)
6546 return cmpps(x, y, 4);
6549 RValue<Float4> cmpnltps(RValue<Float4> x, RValue<Float4> y)
6551 return cmpps(x, y, 5);
6554 RValue<Float4> cmpnleps(RValue<Float4> x, RValue<Float4> y)
6556 return cmpps(x, y, 6);
6559 RValue<Float4> cmpordps(RValue<Float4> x, RValue<Float4> y)
6561 return cmpps(x, y, 7);
6564 RValue<Float> cmpss(RValue<Float> x, RValue<Float> y, unsigned char imm)
6566 llvm::Function *cmpss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cmp_ss);
6568 Value *vector1 = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), x.value, 0);
6569 Value *vector2 = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), y.value, 0);
6571 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(cmpss, vector1, vector2, V(Nucleus::createConstantByte(imm)))), Float::getType(), 0));
6574 RValue<Float> cmpeqss(RValue<Float> x, RValue<Float> y)
6576 return cmpss(x, y, 0);
6579 RValue<Float> cmpltss(RValue<Float> x, RValue<Float> y)
6581 return cmpss(x, y, 1);
6584 RValue<Float> cmpless(RValue<Float> x, RValue<Float> y)
6586 return cmpss(x, y, 2);
6589 RValue<Float> cmpunordss(RValue<Float> x, RValue<Float> y)
6591 return cmpss(x, y, 3);
6594 RValue<Float> cmpneqss(RValue<Float> x, RValue<Float> y)
6596 return cmpss(x, y, 4);
6599 RValue<Float> cmpnltss(RValue<Float> x, RValue<Float> y)
6601 return cmpss(x, y, 5);
6604 RValue<Float> cmpnless(RValue<Float> x, RValue<Float> y)
6606 return cmpss(x, y, 6);
6609 RValue<Float> cmpordss(RValue<Float> x, RValue<Float> y)
6611 return cmpss(x, y, 7);
6614 RValue<Int4> pabsd(RValue<Int4> x)
6616 llvm::Function *pabsd = Intrinsic::getDeclaration(::module, Intrinsic::x86_ssse3_pabs_d_128);
6618 return RValue<Int4>(V(::builder->CreateCall(pabsd, x.value)));
6621 RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
6623 llvm::Function *paddsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padds_w);
6625 return As<Short4>(V(::builder->CreateCall2(paddsw, As<MMX>(x).value, As<MMX>(y).value)));
6628 RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
6630 llvm::Function *psubsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubs_w);
6632 return As<Short4>(V(::builder->CreateCall2(psubsw, As<MMX>(x).value, As<MMX>(y).value)));
6635 RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
6637 llvm::Function *paddusw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_paddus_w);
6639 return As<UShort4>(V(::builder->CreateCall2(paddusw, As<MMX>(x).value, As<MMX>(y).value)));
6642 RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
6644 llvm::Function *psubusw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubus_w);
6646 return As<UShort4>(V(::builder->CreateCall2(psubusw, As<MMX>(x).value, As<MMX>(y).value)));
6649 RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
6651 llvm::Function *paddsb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padds_b);
6653 return As<SByte8>(V(::builder->CreateCall2(paddsb, As<MMX>(x).value, As<MMX>(y).value)));
6656 RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
6658 llvm::Function *psubsb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubs_b);
6660 return As<SByte8>(V(::builder->CreateCall2(psubsb, As<MMX>(x).value, As<MMX>(y).value)));
6663 RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
6665 llvm::Function *paddusb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_paddus_b);
6667 return As<Byte8>(V(::builder->CreateCall2(paddusb, As<MMX>(x).value, As<MMX>(y).value)));
6670 RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
6672 llvm::Function *psubusb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubus_b);
6674 return As<Byte8>(V(::builder->CreateCall2(psubusb, As<MMX>(x).value, As<MMX>(y).value)));
6677 RValue<Short4> paddw(RValue<Short4> x, RValue<Short4> y)
6679 llvm::Function *paddw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padd_w);
6681 return As<Short4>(V(::builder->CreateCall2(paddw, As<MMX>(x).value, As<MMX>(y).value)));
6684 RValue<Short4> psubw(RValue<Short4> x, RValue<Short4> y)
6686 llvm::Function *psubw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psub_w);
6688 return As<Short4>(V(::builder->CreateCall2(psubw, As<MMX>(x).value, As<MMX>(y).value)));
6691 RValue<Short4> pmullw(RValue<Short4> x, RValue<Short4> y)
6693 llvm::Function *pmullw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmull_w);
6695 return As<Short4>(V(::builder->CreateCall2(pmullw, As<MMX>(x).value, As<MMX>(y).value)));
6698 RValue<Short4> pand(RValue<Short4> x, RValue<Short4> y)
6700 llvm::Function *pand = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pand);
6702 return As<Short4>(V(::builder->CreateCall2(pand, As<MMX>(x).value, As<MMX>(y).value)));
6705 RValue<Short4> por(RValue<Short4> x, RValue<Short4> y)
6707 llvm::Function *por = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_por);
6709 return As<Short4>(V(::builder->CreateCall2(por, As<MMX>(x).value, As<MMX>(y).value)));
6712 RValue<Short4> pxor(RValue<Short4> x, RValue<Short4> y)
6714 llvm::Function *pxor = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pxor);
6716 return As<Short4>(V(::builder->CreateCall2(pxor, As<MMX>(x).value, As<MMX>(y).value)));
6719 RValue<Short4> pshufw(RValue<Short4> x, unsigned char y)
6721 llvm::Function *pshufw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_pshuf_w);
6723 return As<Short4>(V(::builder->CreateCall2(pshufw, As<MMX>(x).value, V(Nucleus::createConstantByte(y)))));
6726 RValue<Int2> punpcklwd(RValue<Short4> x, RValue<Short4> y)
6728 llvm::Function *punpcklwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpcklwd);
6730 return As<Int2>(V(::builder->CreateCall2(punpcklwd, As<MMX>(x).value, As<MMX>(y).value)));
6733 RValue<Int2> punpckhwd(RValue<Short4> x, RValue<Short4> y)
6735 llvm::Function *punpckhwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckhwd);
6737 return As<Int2>(V(::builder->CreateCall2(punpckhwd, As<MMX>(x).value, As<MMX>(y).value)));
6740 RValue<Short4> pinsrw(RValue<Short4> x, RValue<Int> y, unsigned int i)
6742 llvm::Function *pinsrw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pinsr_w);
6744 return As<Short4>(V(::builder->CreateCall3(pinsrw, As<MMX>(x).value, y.value, V(Nucleus::createConstantInt(i)))));
6747 RValue<Int> pextrw(RValue<Short4> x, unsigned int i)
6749 llvm::Function *pextrw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pextr_w);
6751 return RValue<Int>(V(::builder->CreateCall2(pextrw, As<MMX>(x).value, V(Nucleus::createConstantInt(i)))));
6754 RValue<Short4> punpckldq(RValue<Int2> x, RValue<Int2> y)
6756 llvm::Function *punpckldq = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckldq);
6758 return As<Short4>(V(::builder->CreateCall2(punpckldq, As<MMX>(x).value, As<MMX>(y).value)));
6761 RValue<Short4> punpckhdq(RValue<Int2> x, RValue<Int2> y)
6763 llvm::Function *punpckhdq = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckhdq);
6765 return As<Short4>(V(::builder->CreateCall2(punpckhdq, As<MMX>(x).value, As<MMX>(y).value)));
6768 RValue<Short4> punpcklbw(RValue<Byte8> x, RValue<Byte8> y)
6770 llvm::Function *punpcklbw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpcklbw);
6772 return As<Short4>(V(::builder->CreateCall2(punpcklbw, As<MMX>(x).value, As<MMX>(y).value)));
6775 RValue<Short4> punpckhbw(RValue<Byte8> x, RValue<Byte8> y)
6777 llvm::Function *punpckhbw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckhbw);
6779 return As<Short4>(V(::builder->CreateCall2(punpckhbw, As<MMX>(x).value, As<MMX>(y).value)));
6782 RValue<Byte8> paddb(RValue<Byte8> x, RValue<Byte8> y)
6784 llvm::Function *paddb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padd_b);
6786 return As<Byte8>(V(::builder->CreateCall2(paddb, As<MMX>(x).value, As<MMX>(y).value)));
6789 RValue<Byte8> psubb(RValue<Byte8> x, RValue<Byte8> y)
6791 llvm::Function *psubb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psub_b);
6793 return As<Byte8>(V(::builder->CreateCall2(psubb, As<MMX>(x).value, As<MMX>(y).value)));
6796 RValue<Int2> paddd(RValue<Int2> x, RValue<Int2> y)
6798 llvm::Function *paddd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padd_d);
6800 return As<Int2>(V(::builder->CreateCall2(paddd, As<MMX>(x).value, As<MMX>(y).value)));
6803 RValue<Int2> psubd(RValue<Int2> x, RValue<Int2> y)
6805 llvm::Function *psubd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psub_d);
6807 return As<Int2>(V(::builder->CreateCall2(psubd, As<MMX>(x).value, As<MMX>(y).value)));
6810 RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
6812 llvm::Function *pavgw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pavg_w);
6814 return As<UShort4>(V(::builder->CreateCall2(pavgw, As<MMX>(x).value, As<MMX>(y).value)));
6817 RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
6819 llvm::Function *pmaxsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmaxs_w);
6821 return As<Short4>(V(::builder->CreateCall2(pmaxsw, As<MMX>(x).value, As<MMX>(y).value)));
6824 RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
6826 llvm::Function *pminsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmins_w);
6828 return As<Short4>(V(::builder->CreateCall2(pminsw, As<MMX>(x).value, As<MMX>(y).value)));
6831 RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
6833 llvm::Function *pcmpgtw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpgt_w);
6835 return As<Short4>(V(::builder->CreateCall2(pcmpgtw, As<MMX>(x).value, As<MMX>(y).value)));
6838 RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
6840 llvm::Function *pcmpeqw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpeq_w);
6842 return As<Short4>(V(::builder->CreateCall2(pcmpeqw, As<MMX>(x).value, As<MMX>(y).value)));
6845 RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
6847 llvm::Function *pcmpgtb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpgt_b);
6849 return As<Byte8>(V(::builder->CreateCall2(pcmpgtb, As<MMX>(x).value, As<MMX>(y).value)));
6852 RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
6854 llvm::Function *pcmpeqb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpeq_b);
6856 return As<Byte8>(V(::builder->CreateCall2(pcmpeqb, As<MMX>(x).value, As<MMX>(y).value)));
6859 RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
6861 llvm::Function *packssdw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_packssdw);
6863 return As<Short4>(V(::builder->CreateCall2(packssdw, As<MMX>(x).value, As<MMX>(y).value)));
6866 RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
6868 if(CPUID::supportsSSE2())
6870 llvm::Function *packssdw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_packssdw_128);
6872 return RValue<Short8>(V(::builder->CreateCall2(packssdw, x.value, y.value)));
6877 Int2 hiX = Int2(Swizzle(x, 0xEE));
6880 Int2 hiY = Int2(Swizzle(y, 0xEE));
6882 Short4 lo = x86::packssdw(loX, hiX);
6883 Short4 hi = x86::packssdw(loY, hiY);
6885 return Short8(lo, hi);
6889 RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
6891 llvm::Function *packsswb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_packsswb);
6893 return As<SByte8>(V(::builder->CreateCall2(packsswb, As<MMX>(x).value, As<MMX>(y).value)));
6896 RValue<Byte8> packuswb(RValue<UShort4> x, RValue<UShort4> y)
6898 llvm::Function *packuswb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_packuswb);
6900 return As<Byte8>(V(::builder->CreateCall2(packuswb, As<MMX>(x).value, As<MMX>(y).value)));
6903 RValue<UShort8> packusdw(RValue<Int4> x, RValue<Int4> y)
6905 if(CPUID::supportsSSE4_1())
6907 llvm::Function *packusdw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_packusdw);
6909 return RValue<UShort8>(V(::builder->CreateCall2(packusdw, x.value, y.value)));
6913 RValue<Int4> bx = (x & ~(x >> 31)) - Int4(0x8000);
6914 RValue<Int4> by = (y & ~(y >> 31)) - Int4(0x8000);
6916 return As<UShort8>(packssdw(bx, by) + Short8(0x8000u));
6920 RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
6922 llvm::Function *psrlw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrli_w);
6924 return As<UShort4>(V(::builder->CreateCall2(psrlw, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
6927 RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
6929 llvm::Function *psrlw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrli_w);
6931 return RValue<UShort8>(V(::builder->CreateCall2(psrlw, x.value, V(Nucleus::createConstantInt(y)))));
6934 RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
6936 llvm::Function *psraw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrai_w);
6938 return As<Short4>(V(::builder->CreateCall2(psraw, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
6941 RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
6943 llvm::Function *psraw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrai_w);
6945 return RValue<Short8>(V(::builder->CreateCall2(psraw, x.value, V(Nucleus::createConstantInt(y)))));
6948 RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
6950 llvm::Function *psllw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pslli_w);
6952 return As<Short4>(V(::builder->CreateCall2(psllw, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
6955 RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
6957 llvm::Function *psllw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pslli_w);
6959 return RValue<Short8>(V(::builder->CreateCall2(psllw, x.value, V(Nucleus::createConstantInt(y)))));
6962 RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
6964 llvm::Function *pslld = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pslli_d);
6966 return As<Int2>(V(::builder->CreateCall2(pslld, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
6969 RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
6971 if(CPUID::supportsSSE2())
6973 llvm::Function *pslld = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pslli_d);
6975 return RValue<Int4>(V(::builder->CreateCall2(pslld, x.value, V(Nucleus::createConstantInt(y)))));
6980 Int2 hi = Int2(Swizzle(x, 0xEE));
6982 lo = x86::pslld(lo, y);
6983 hi = x86::pslld(hi, y);
6985 return Int4(lo, hi);
6989 RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
6991 llvm::Function *psrad = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrai_d);
6993 return As<Int2>(V(::builder->CreateCall2(psrad, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
6996 RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
6998 if(CPUID::supportsSSE2())
7000 llvm::Function *psrad = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrai_d);
7002 return RValue<Int4>(V(::builder->CreateCall2(psrad, x.value, V(Nucleus::createConstantInt(y)))));
7007 Int2 hi = Int2(Swizzle(x, 0xEE));
7009 lo = x86::psrad(lo, y);
7010 hi = x86::psrad(hi, y);
7012 return Int4(lo, hi);
7016 RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
7018 llvm::Function *psrld = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrli_d);
7020 return As<UInt2>(V(::builder->CreateCall2(psrld, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
7023 RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
7025 if(CPUID::supportsSSE2())
7027 llvm::Function *psrld = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrli_d);
7029 return RValue<UInt4>(V(::builder->CreateCall2(psrld, x.value, V(Nucleus::createConstantInt(y)))));
7033 UInt2 lo = As<UInt2>(Int2(As<Int4>(x)));
7034 UInt2 hi = As<UInt2>(Int2(Swizzle(As<Int4>(x), 0xEE)));
7036 lo = x86::psrld(lo, y);
7037 hi = x86::psrld(hi, y);
7039 return UInt4(lo, hi);
7043 RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
7045 llvm::Function *pmaxsd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmaxsd);
7047 return RValue<Int4>(V(::builder->CreateCall2(pmaxsd, x.value, y.value)));
7050 RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
7052 llvm::Function *pminsd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pminsd);
7054 return RValue<Int4>(V(::builder->CreateCall2(pminsd, x.value, y.value)));
7057 RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
7059 llvm::Function *pmaxud = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmaxud);
7061 return RValue<UInt4>(V(::builder->CreateCall2(pmaxud, x.value, y.value)));
7064 RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
7066 llvm::Function *pminud = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pminud);
7068 return RValue<UInt4>(V(::builder->CreateCall2(pminud, x.value, y.value)));
7071 RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
7073 llvm::Function *pmulhw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmulh_w);
7075 return As<Short4>(V(::builder->CreateCall2(pmulhw, As<MMX>(x).value, As<MMX>(y).value)));
7078 RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
7080 llvm::Function *pmulhuw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmulhu_w);
7082 return As<UShort4>(V(::builder->CreateCall2(pmulhuw, As<MMX>(x).value, As<MMX>(y).value)));
7085 RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
7087 llvm::Function *pmaddwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmadd_wd);
7089 return As<Int2>(V(::builder->CreateCall2(pmaddwd, As<MMX>(x).value, As<MMX>(y).value)));
7092 RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
7094 llvm::Function *pmulhw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pmulh_w);
7096 return RValue<Short8>(V(::builder->CreateCall2(pmulhw, x.value, y.value)));
7099 RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
7101 llvm::Function *pmulhuw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pmulhu_w);
7103 return RValue<UShort8>(V(::builder->CreateCall2(pmulhuw, x.value, y.value)));
7106 RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
7108 llvm::Function *pmaddwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pmadd_wd);
7110 return RValue<Int4>(V(::builder->CreateCall2(pmaddwd, x.value, y.value)));
7113 RValue<Int> movmskps(RValue<Float4> x)
7115 llvm::Function *movmskps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_movmsk_ps);
7117 return RValue<Int>(V(::builder->CreateCall(movmskps, x.value)));
7120 RValue<Int> pmovmskb(RValue<Byte8> x)
7122 llvm::Function *pmovmskb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmovmskb);
7124 return RValue<Int>(V(::builder->CreateCall(pmovmskb, As<MMX>(x).value)));
7127 //RValue<Int2> movd(RValue<Pointer<Int>> x)
7129 // Value *element = Nucleus::createLoad(x.value);
7131 //// Value *int2 = UndefValue::get(Int2::getType());
7132 //// int2 = Nucleus::createInsertElement(int2, element, ConstantInt::get(Int::getType(), 0));
7134 // Value *int2 = Nucleus::createBitCast(Nucleus::createZExt(element, Long::getType()), Int2::getType());
7136 // return RValue<Int2>(int2);
7139 //RValue<Int2> movdq2q(RValue<Int4> x)
7141 // Value *long2 = Nucleus::createBitCast(x.value, T(VectorType::get(Long::getType(), 2)));
7142 // Value *element = Nucleus::createExtractElement(long2, ConstantInt::get(Int::getType(), 0));
7144 // return RValue<Int2>(Nucleus::createBitCast(element, Int2::getType()));
7147 RValue<Int4> pmovzxbd(RValue<Int4> x)
7149 llvm::Function *pmovzxbd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovzxbd);
7151 return RValue<Int4>(V(::builder->CreateCall(pmovzxbd, Nucleus::createBitCast(x.value, Byte16::getType()))));
7154 RValue<Int4> pmovsxbd(RValue<Int4> x)
7156 llvm::Function *pmovsxbd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovsxbd);
7158 return RValue<Int4>(V(::builder->CreateCall(pmovsxbd, Nucleus::createBitCast(x.value, SByte16::getType()))));
7161 RValue<Int4> pmovzxwd(RValue<Int4> x)
7163 llvm::Function *pmovzxwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovzxwd);
7165 return RValue<Int4>(V(::builder->CreateCall(pmovzxwd, Nucleus::createBitCast(x.value, UShort8::getType()))));
7168 RValue<Int4> pmovsxwd(RValue<Int4> x)
7170 llvm::Function *pmovsxwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovsxwd);
7172 return RValue<Int4>(V(::builder->CreateCall(pmovsxwd, Nucleus::createBitCast(x.value, Short8::getType()))));
7177 llvm::Function *emms = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_emms);
7179 V(::builder->CreateCall(emms));