1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "Nucleus.hpp"
17 #include "llvm/Support/IRBuilder.h"
18 #include "llvm/Function.h"
19 #include "llvm/GlobalVariable.h"
20 #include "llvm/Module.h"
21 #include "llvm/LLVMContext.h"
22 #include "llvm/Constants.h"
23 #include "llvm/Intrinsics.h"
24 #include "llvm/PassManager.h"
25 #include "llvm/Analysis/LoopPass.h"
26 #include "llvm/Transforms/Scalar.h"
27 #include "llvm/Target/TargetData.h"
28 #include "llvm/Target/TargetOptions.h"
29 #include "llvm/Support/TargetSelect.h"
30 #include "../lib/ExecutionEngine/JIT/JIT.h"
32 #include "LLVMRoutine.hpp"
33 #include "LLVMRoutineManager.hpp"
38 #include "MutexLock.hpp"
40 #include <xmmintrin.h>
43 #if defined(__x86_64__) && defined(_WIN32)
44 extern "C" void X86CompilationCallback()
46 assert(false); // UNIMPLEMENTED
52 bool (*CodeAnalystInitialize)() = 0;
53 void (*CodeAnalystCompleteJITLog)() = 0;
54 bool (*CodeAnalystLogJITCode)(const void *jitCodeStartAddr, unsigned int jitCodeSize, const wchar_t *functionName) = 0;
59 extern bool JITEmitDebugInfo;
64 sw::LLVMRoutineManager *routineManager = nullptr;
65 llvm::ExecutionEngine *executionEngine = nullptr;
66 llvm::IRBuilder<> *builder = nullptr;
67 llvm::LLVMContext *context = nullptr;
68 llvm::Module *module = nullptr;
69 llvm::Function *function = nullptr;
71 sw::BackoffLock codegenMutex;
78 Optimization optimization[10] = {InstructionCombining, Disabled};
80 class Type : public llvm::Type {};
81 class Value : public llvm::Value {};
82 class SwitchCases : public llvm::SwitchInst {};
83 class BasicBlock : public llvm::BasicBlock {};
85 inline Type *T(llvm::Type *t)
87 return reinterpret_cast<Type*>(t);
90 inline Value *V(llvm::Value *t)
92 return reinterpret_cast<Value*>(t);
95 inline std::vector<llvm::Type*> &T(std::vector<Type*> &t)
97 return reinterpret_cast<std::vector<llvm::Type*>&>(t);
100 inline BasicBlock *B(llvm::BasicBlock *t)
102 return reinterpret_cast<BasicBlock*>(t);
107 ::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe
109 InitializeNativeTarget();
110 JITEmitDebugInfo = false;
114 ::context = new LLVMContext();
117 ::module = new Module("", *::context);
118 ::routineManager = new LLVMRoutineManager();
120 #if defined(__x86_64__)
121 const char *architecture = "x86-64";
123 const char *architecture = "x86";
126 SmallVector<std::string, 1> MAttrs;
127 MAttrs.push_back(CPUID::supportsMMX() ? "+mmx" : "-mmx");
128 MAttrs.push_back(CPUID::supportsCMOV() ? "+cmov" : "-cmov");
129 MAttrs.push_back(CPUID::supportsSSE() ? "+sse" : "-sse");
130 MAttrs.push_back(CPUID::supportsSSE2() ? "+sse2" : "-sse2");
131 MAttrs.push_back(CPUID::supportsSSE3() ? "+sse3" : "-sse3");
132 MAttrs.push_back(CPUID::supportsSSSE3() ? "+ssse3" : "-ssse3");
133 MAttrs.push_back(CPUID::supportsSSE4_1() ? "+sse41" : "-sse41");
136 TargetMachine *targetMachine = EngineBuilder::selectTarget(::module, architecture, "", MAttrs, Reloc::Default, CodeModel::JITDefault, &error);
137 ::executionEngine = JIT::createJIT(::module, 0, ::routineManager, CodeGenOpt::Aggressive, true, targetMachine);
141 ::builder = new IRBuilder<>(*::context);
144 HMODULE CodeAnalyst = LoadLibrary("CAJitNtfyLib.dll");
147 CodeAnalystInitialize = (bool(*)())GetProcAddress(CodeAnalyst, "CAJIT_Initialize");
148 CodeAnalystCompleteJITLog = (void(*)())GetProcAddress(CodeAnalyst, "CAJIT_CompleteJITLog");
149 CodeAnalystLogJITCode = (bool(*)(const void*, unsigned int, const wchar_t*))GetProcAddress(CodeAnalyst, "CAJIT_LogJITCode");
151 CodeAnalystInitialize();
159 delete ::executionEngine;
160 ::executionEngine = nullptr;
162 ::routineManager = nullptr;
163 ::function = nullptr;
166 ::codegenMutex.unlock();
169 Routine *Nucleus::acquireRoutine(const wchar_t *name, bool runOptimizations)
171 if(::builder->GetInsertBlock()->empty() || !::builder->GetInsertBlock()->back().isTerminator())
173 llvm::Type *type = ::function->getReturnType();
181 createRet(V(UndefValue::get(type)));
188 raw_fd_ostream file("llvm-dump-unopt.txt", error);
189 ::module->print(file, 0);
200 raw_fd_ostream file("llvm-dump-opt.txt", error);
201 ::module->print(file, 0);
204 void *entry = ::executionEngine->getPointerToFunction(::function);
205 LLVMRoutine *routine = ::routineManager->acquireRoutine(entry);
207 if(CodeAnalystLogJITCode)
209 CodeAnalystLogJITCode(routine->getEntry(), routine->getCodeSize(), name);
215 void Nucleus::optimize()
217 static PassManager *passManager = nullptr;
221 passManager = new PassManager();
224 // NoInfsFPMath = true;
225 // NoNaNsFPMath = true;
227 passManager->add(new TargetData(*::executionEngine->getTargetData()));
228 passManager->add(createScalarReplAggregatesPass());
230 for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
232 switch(optimization[pass])
234 case Disabled: break;
235 case CFGSimplification: passManager->add(createCFGSimplificationPass()); break;
236 case LICM: passManager->add(createLICMPass()); break;
237 case AggressiveDCE: passManager->add(createAggressiveDCEPass()); break;
238 case GVN: passManager->add(createGVNPass()); break;
239 case InstructionCombining: passManager->add(createInstructionCombiningPass()); break;
240 case Reassociate: passManager->add(createReassociatePass()); break;
241 case DeadStoreElimination: passManager->add(createDeadStoreEliminationPass()); break;
242 case SCCP: passManager->add(createSCCPPass()); break;
243 case ScalarReplAggregates: passManager->add(createScalarReplAggregatesPass()); break;
250 passManager->run(*::module);
253 Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
255 // Need to allocate it in the entry block for mem2reg to work
256 llvm::BasicBlock &entryBlock = ::function->getEntryBlock();
258 Instruction *declaration;
262 declaration = new AllocaInst(type, Nucleus::createConstantInt(arraySize));
266 declaration = new AllocaInst(type, (Value*)0);
269 entryBlock.getInstList().push_front(declaration);
271 return V(declaration);
274 BasicBlock *Nucleus::createBasicBlock()
276 return B(BasicBlock::Create(*::context, "", ::function));
279 BasicBlock *Nucleus::getInsertBlock()
281 return B(::builder->GetInsertBlock());
284 void Nucleus::setInsertBlock(BasicBlock *basicBlock)
286 // assert(::builder->GetInsertBlock()->back().isTerminator());
287 return ::builder->SetInsertPoint(basicBlock);
290 void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
292 llvm::FunctionType *functionType = llvm::FunctionType::get(ReturnType, T(Params), false);
293 ::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", ::module);
294 ::function->setCallingConv(llvm::CallingConv::C);
296 ::builder->SetInsertPoint(BasicBlock::Create(*::context, "", ::function));
299 Value *Nucleus::getArgument(unsigned int index)
301 llvm::Function::arg_iterator args = ::function->arg_begin();
312 void Nucleus::createRetVoid()
316 ::builder->CreateRetVoid();
319 void Nucleus::createRet(Value *v)
323 ::builder->CreateRet(v);
326 void Nucleus::createBr(BasicBlock *dest)
328 ::builder->CreateBr(dest);
331 void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
333 ::builder->CreateCondBr(cond, ifTrue, ifFalse);
336 Value *Nucleus::createAdd(Value *lhs, Value *rhs)
338 return V(::builder->CreateAdd(lhs, rhs));
341 Value *Nucleus::createSub(Value *lhs, Value *rhs)
343 return V(::builder->CreateSub(lhs, rhs));
346 Value *Nucleus::createMul(Value *lhs, Value *rhs)
348 return V(::builder->CreateMul(lhs, rhs));
351 Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
353 return V(::builder->CreateUDiv(lhs, rhs));
356 Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
358 return V(::builder->CreateSDiv(lhs, rhs));
361 Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
363 return V(::builder->CreateFAdd(lhs, rhs));
366 Value *Nucleus::createFSub(Value *lhs, Value *rhs)
368 return V(::builder->CreateFSub(lhs, rhs));
371 Value *Nucleus::createFMul(Value *lhs, Value *rhs)
373 return V(::builder->CreateFMul(lhs, rhs));
376 Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
378 return V(::builder->CreateFDiv(lhs, rhs));
381 Value *Nucleus::createURem(Value *lhs, Value *rhs)
383 return V(::builder->CreateURem(lhs, rhs));
386 Value *Nucleus::createSRem(Value *lhs, Value *rhs)
388 return V(::builder->CreateSRem(lhs, rhs));
391 Value *Nucleus::createFRem(Value *lhs, Value *rhs)
393 return V(::builder->CreateFRem(lhs, rhs));
396 Value *Nucleus::createShl(Value *lhs, Value *rhs)
398 return V(::builder->CreateShl(lhs, rhs));
401 Value *Nucleus::createLShr(Value *lhs, Value *rhs)
403 return V(::builder->CreateLShr(lhs, rhs));
406 Value *Nucleus::createAShr(Value *lhs, Value *rhs)
408 return V(::builder->CreateAShr(lhs, rhs));
411 Value *Nucleus::createAnd(Value *lhs, Value *rhs)
413 return V(::builder->CreateAnd(lhs, rhs));
416 Value *Nucleus::createOr(Value *lhs, Value *rhs)
418 return V(::builder->CreateOr(lhs, rhs));
421 Value *Nucleus::createXor(Value *lhs, Value *rhs)
423 return V(::builder->CreateXor(lhs, rhs));
426 Value *Nucleus::createNeg(Value *v)
428 return V(::builder->CreateNeg(v));
431 Value *Nucleus::createFNeg(Value *v)
433 return V(::builder->CreateFNeg(v));
436 Value *Nucleus::createNot(Value *v)
438 return V(::builder->CreateNot(v));
441 Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align)
443 assert(ptr->getType()->getContainedType(0) == type);
444 return V(::builder->Insert(new LoadInst(ptr, "", isVolatile, align)));
447 Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align)
449 assert(ptr->getType()->getContainedType(0) == type);
450 ::builder->Insert(new StoreInst(value, ptr, isVolatile, align));
454 Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index)
456 assert(ptr->getType()->getContainedType(0) == type);
457 return V(::builder->CreateGEP(ptr, index));
460 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
462 return V(::builder->CreateAtomicRMW(AtomicRMWInst::Add, ptr, value, SequentiallyConsistent));
465 Value *Nucleus::createTrunc(Value *v, Type *destType)
467 return V(::builder->CreateTrunc(v, destType));
470 Value *Nucleus::createZExt(Value *v, Type *destType)
472 return V(::builder->CreateZExt(v, destType));
475 Value *Nucleus::createSExt(Value *v, Type *destType)
477 return V(::builder->CreateSExt(v, destType));
480 Value *Nucleus::createFPToSI(Value *v, Type *destType)
482 return V(::builder->CreateFPToSI(v, destType));
485 Value *Nucleus::createSIToFP(Value *v, Type *destType)
487 return V(::builder->CreateSIToFP(v, destType));
490 Value *Nucleus::createFPTrunc(Value *v, Type *destType)
492 return V(::builder->CreateFPTrunc(v, destType));
495 Value *Nucleus::createFPExt(Value *v, Type *destType)
497 return V(::builder->CreateFPExt(v, destType));
500 Value *Nucleus::createBitCast(Value *v, Type *destType)
502 return V(::builder->CreateBitCast(v, destType));
505 Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
507 return V(::builder->CreateICmpEQ(lhs, rhs));
510 Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
512 return V(::builder->CreateICmpNE(lhs, rhs));
515 Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
517 return V(::builder->CreateICmpUGT(lhs, rhs));
520 Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
522 return V(::builder->CreateICmpUGE(lhs, rhs));
525 Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
527 return V(::builder->CreateICmpULT(lhs, rhs));
530 Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
532 return V(::builder->CreateICmpULE(lhs, rhs));
535 Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
537 return V(::builder->CreateICmpSGT(lhs, rhs));
540 Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
542 return V(::builder->CreateICmpSGE(lhs, rhs));
545 Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
547 return V(::builder->CreateICmpSLT(lhs, rhs));
550 Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
552 return V(::builder->CreateICmpSLE(lhs, rhs));
555 Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
557 return V(::builder->CreateFCmpOEQ(lhs, rhs));
560 Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
562 return V(::builder->CreateFCmpOGT(lhs, rhs));
565 Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
567 return V(::builder->CreateFCmpOGE(lhs, rhs));
570 Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
572 return V(::builder->CreateFCmpOLT(lhs, rhs));
575 Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
577 return V(::builder->CreateFCmpOLE(lhs, rhs));
580 Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
582 return V(::builder->CreateFCmpONE(lhs, rhs));
585 Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
587 return V(::builder->CreateFCmpORD(lhs, rhs));
590 Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
592 return V(::builder->CreateFCmpUNO(lhs, rhs));
595 Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
597 return V(::builder->CreateFCmpUEQ(lhs, rhs));
600 Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
602 return V(::builder->CreateFCmpUGT(lhs, rhs));
605 Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
607 return V(::builder->CreateFCmpUGE(lhs, rhs));
610 Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
612 return V(::builder->CreateFCmpULT(lhs, rhs));
615 Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
617 return V(::builder->CreateFCmpULE(lhs, rhs));
620 Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
622 return V(::builder->CreateFCmpULE(lhs, rhs));
625 Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
627 assert(vector->getType()->getContainedType(0) == type);
628 return V(::builder->CreateExtractElement(vector, createConstantInt(index)));
631 Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
633 return V(::builder->CreateInsertElement(vector, element, createConstantInt(index)));
636 Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
638 int size = llvm::cast<llvm::VectorType>(V1->getType())->getNumElements();
639 const int maxSize = 16;
640 llvm::Constant *swizzle[maxSize];
641 assert(size <= maxSize);
643 for(int i = 0; i < size; i++)
645 swizzle[i] = llvm::ConstantInt::get(Type::getInt32Ty(*::context), select[i]);
648 llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size));
650 return V(::builder->CreateShuffleVector(V1, V2, shuffle));
653 Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
655 return V(::builder->CreateSelect(C, ifTrue, ifFalse));
658 SwitchCases *Nucleus::createSwitch(Value *control, BasicBlock *defaultBranch, unsigned numCases)
660 return reinterpret_cast<SwitchCases*>(::builder->CreateSwitch(control, defaultBranch, numCases));
663 void Nucleus::addSwitchCase(SwitchCases *switchCases, int label, BasicBlock *branch)
665 switchCases->addCase(llvm::ConstantInt::get(Type::getInt32Ty(*::context), label, true), branch);
668 void Nucleus::createUnreachable()
670 ::builder->CreateUnreachable();
673 static Value *createSwizzle4(Value *val, unsigned char select)
677 (select >> 0) & 0x03,
678 (select >> 2) & 0x03,
679 (select >> 4) & 0x03,
680 (select >> 6) & 0x03,
683 return Nucleus::createShuffleVector(val, val, swizzle);
686 static Value *createMask4(Value *lhs, Value *rhs, unsigned char select)
688 bool mask[4] = {false, false, false, false};
690 mask[(select >> 0) & 0x03] = true;
691 mask[(select >> 2) & 0x03] = true;
692 mask[(select >> 4) & 0x03] = true;
693 mask[(select >> 6) & 0x03] = true;
703 return Nucleus::createShuffleVector(lhs, rhs, swizzle);
706 Type *Nucleus::getPointerType(Type *ElementType)
708 return T(llvm::PointerType::get(ElementType, 0));
711 Value *Nucleus::createNullValue(Type *Ty)
713 return V(llvm::Constant::getNullValue(Ty));
716 Value *Nucleus::createConstantLong(int64_t i)
718 return V(llvm::ConstantInt::get(Type::getInt64Ty(*::context), i, true));
721 Value *Nucleus::createConstantInt(int i)
723 return V(llvm::ConstantInt::get(Type::getInt32Ty(*::context), i, true));
726 Value *Nucleus::createConstantInt(unsigned int i)
728 return V(llvm::ConstantInt::get(Type::getInt32Ty(*::context), i, false));
731 Value *Nucleus::createConstantBool(bool b)
733 return V(llvm::ConstantInt::get(Type::getInt1Ty(*::context), b));
736 Value *Nucleus::createConstantByte(signed char i)
738 return V(llvm::ConstantInt::get(Type::getInt8Ty(*::context), i, true));
741 Value *Nucleus::createConstantByte(unsigned char i)
743 return V(llvm::ConstantInt::get(Type::getInt8Ty(*::context), i, false));
746 Value *Nucleus::createConstantShort(short i)
748 return V(llvm::ConstantInt::get(Type::getInt16Ty(*::context), i, true));
751 Value *Nucleus::createConstantShort(unsigned short i)
753 return V(llvm::ConstantInt::get(Type::getInt16Ty(*::context), i, false));
756 Value *Nucleus::createConstantFloat(float x)
758 return V(llvm::ConstantFP::get(Float::getType(), x));
761 Value *Nucleus::createNullPointer(Type *Ty)
763 return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(Ty, 0)));
766 Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
768 assert(llvm::isa<VectorType>(type));
769 const int numConstants = llvm::cast<VectorType>(type)->getNumElements();
770 assert(numConstants <= 16);
771 llvm::Constant *constantVector[16];
773 for(int i = 0; i < numConstants; i++)
775 constantVector[i] = llvm::ConstantInt::get(type->getContainedType(0), constants[i]);
778 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numConstants)));
781 Value *Nucleus::createConstantVector(const double *constants, Type *type)
783 assert(llvm::isa<VectorType>(type));
784 const int numConstants = llvm::cast<VectorType>(type)->getNumElements();
785 assert(numConstants <= 8);
786 llvm::Constant *constantVector[8];
788 for(int i = 0; i < numConstants; i++)
790 constantVector[i] = llvm::ConstantFP::get(type->getContainedType(0), constants[i]);
793 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numConstants)));
796 Type *Void::getType()
798 return T(llvm::Type::getVoidTy(*::context));
801 class MMX : public LValue<MMX>
804 static Type *getType();
809 return T(llvm::Type::getX86_MMXTy(*::context));
812 Bool::Bool(Argument<Bool> argument)
814 storeValue(argument.value);
823 storeValue(Nucleus::createConstantBool(x));
826 Bool::Bool(RValue<Bool> rhs)
828 storeValue(rhs.value);
831 Bool::Bool(const Bool &rhs)
833 Value *value = rhs.loadValue();
837 Bool::Bool(const Reference<Bool> &rhs)
839 Value *value = rhs.loadValue();
843 RValue<Bool> Bool::operator=(RValue<Bool> rhs)
845 storeValue(rhs.value);
850 RValue<Bool> Bool::operator=(const Bool &rhs)
852 Value *value = rhs.loadValue();
855 return RValue<Bool>(value);
858 RValue<Bool> Bool::operator=(const Reference<Bool> &rhs)
860 Value *value = rhs.loadValue();
863 return RValue<Bool>(value);
866 RValue<Bool> operator!(RValue<Bool> val)
868 return RValue<Bool>(Nucleus::createNot(val.value));
871 RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs)
873 return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value));
876 RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs)
878 return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value));
881 Type *Bool::getType()
883 return T(llvm::Type::getInt1Ty(*::context));
886 Byte::Byte(Argument<Byte> argument)
888 storeValue(argument.value);
891 Byte::Byte(RValue<Int> cast)
893 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
898 Byte::Byte(RValue<UInt> cast)
900 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
905 Byte::Byte(RValue<UShort> cast)
907 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
918 storeValue(Nucleus::createConstantByte((unsigned char)x));
921 Byte::Byte(unsigned char x)
923 storeValue(Nucleus::createConstantByte(x));
926 Byte::Byte(RValue<Byte> rhs)
928 storeValue(rhs.value);
931 Byte::Byte(const Byte &rhs)
933 Value *value = rhs.loadValue();
937 Byte::Byte(const Reference<Byte> &rhs)
939 Value *value = rhs.loadValue();
943 RValue<Byte> Byte::operator=(RValue<Byte> rhs)
945 storeValue(rhs.value);
950 RValue<Byte> Byte::operator=(const Byte &rhs)
952 Value *value = rhs.loadValue();
955 return RValue<Byte>(value);
958 RValue<Byte> Byte::operator=(const Reference<Byte> &rhs)
960 Value *value = rhs.loadValue();
963 return RValue<Byte>(value);
966 RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs)
968 return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value));
971 RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs)
973 return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value));
976 RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs)
978 return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value));
981 RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs)
983 return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value));
986 RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs)
988 return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value));
991 RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs)
993 return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value));
996 RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs)
998 return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value));
1001 RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs)
1003 return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value));
1006 RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs)
1008 return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value));
1011 RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs)
1013 return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value));
1016 RValue<Byte> operator+=(Byte &lhs, RValue<Byte> rhs)
1018 return lhs = lhs + rhs;
1021 RValue<Byte> operator-=(Byte &lhs, RValue<Byte> rhs)
1023 return lhs = lhs - rhs;
1026 RValue<Byte> operator*=(Byte &lhs, RValue<Byte> rhs)
1028 return lhs = lhs * rhs;
1031 RValue<Byte> operator/=(Byte &lhs, RValue<Byte> rhs)
1033 return lhs = lhs / rhs;
1036 RValue<Byte> operator%=(Byte &lhs, RValue<Byte> rhs)
1038 return lhs = lhs % rhs;
1041 RValue<Byte> operator&=(Byte &lhs, RValue<Byte> rhs)
1043 return lhs = lhs & rhs;
1046 RValue<Byte> operator|=(Byte &lhs, RValue<Byte> rhs)
1048 return lhs = lhs | rhs;
1051 RValue<Byte> operator^=(Byte &lhs, RValue<Byte> rhs)
1053 return lhs = lhs ^ rhs;
1056 RValue<Byte> operator<<=(Byte &lhs, RValue<Byte> rhs)
1058 return lhs = lhs << rhs;
1061 RValue<Byte> operator>>=(Byte &lhs, RValue<Byte> rhs)
1063 return lhs = lhs >> rhs;
1066 RValue<Byte> operator+(RValue<Byte> val)
1071 RValue<Byte> operator-(RValue<Byte> val)
1073 return RValue<Byte>(Nucleus::createNeg(val.value));
1076 RValue<Byte> operator~(RValue<Byte> val)
1078 return RValue<Byte>(Nucleus::createNot(val.value));
1081 RValue<Byte> operator++(Byte &val, int) // Post-increment
1083 RValue<Byte> res = val;
1085 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantByte((unsigned char)1)));
1086 val.storeValue(inc);
1091 const Byte &operator++(Byte &val) // Pre-increment
1093 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantByte((unsigned char)1)));
1094 val.storeValue(inc);
1099 RValue<Byte> operator--(Byte &val, int) // Post-decrement
1101 RValue<Byte> res = val;
1103 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantByte((unsigned char)1)));
1104 val.storeValue(inc);
1109 const Byte &operator--(Byte &val) // Pre-decrement
1111 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantByte((unsigned char)1)));
1112 val.storeValue(inc);
1117 RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs)
1119 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1122 RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs)
1124 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1127 RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs)
1129 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1132 RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs)
1134 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1137 RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs)
1139 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1142 RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs)
1144 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1147 Type *Byte::getType()
1149 return T(llvm::Type::getInt8Ty(*::context));
1152 SByte::SByte(Argument<SByte> argument)
1154 storeValue(argument.value);
1157 SByte::SByte(RValue<Int> cast)
1159 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1161 storeValue(integer);
1164 SByte::SByte(RValue<Short> cast)
1166 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1168 storeValue(integer);
1175 SByte::SByte(signed char x)
1177 storeValue(Nucleus::createConstantByte(x));
1180 SByte::SByte(RValue<SByte> rhs)
1182 storeValue(rhs.value);
1185 SByte::SByte(const SByte &rhs)
1187 Value *value = rhs.loadValue();
1191 SByte::SByte(const Reference<SByte> &rhs)
1193 Value *value = rhs.loadValue();
1197 RValue<SByte> SByte::operator=(RValue<SByte> rhs)
1199 storeValue(rhs.value);
1204 RValue<SByte> SByte::operator=(const SByte &rhs)
1206 Value *value = rhs.loadValue();
1209 return RValue<SByte>(value);
1212 RValue<SByte> SByte::operator=(const Reference<SByte> &rhs)
1214 Value *value = rhs.loadValue();
1217 return RValue<SByte>(value);
1220 RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs)
1222 return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value));
1225 RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs)
1227 return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value));
1230 RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs)
1232 return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value));
1235 RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs)
1237 return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value));
1240 RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs)
1242 return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value));
1245 RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs)
1247 return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value));
1250 RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs)
1252 return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value));
1255 RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs)
1257 return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value));
1260 RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs)
1262 return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value));
1265 RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs)
1267 return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value));
1270 RValue<SByte> operator+=(SByte &lhs, RValue<SByte> rhs)
1272 return lhs = lhs + rhs;
1275 RValue<SByte> operator-=(SByte &lhs, RValue<SByte> rhs)
1277 return lhs = lhs - rhs;
1280 RValue<SByte> operator*=(SByte &lhs, RValue<SByte> rhs)
1282 return lhs = lhs * rhs;
1285 RValue<SByte> operator/=(SByte &lhs, RValue<SByte> rhs)
1287 return lhs = lhs / rhs;
1290 RValue<SByte> operator%=(SByte &lhs, RValue<SByte> rhs)
1292 return lhs = lhs % rhs;
1295 RValue<SByte> operator&=(SByte &lhs, RValue<SByte> rhs)
1297 return lhs = lhs & rhs;
1300 RValue<SByte> operator|=(SByte &lhs, RValue<SByte> rhs)
1302 return lhs = lhs | rhs;
1305 RValue<SByte> operator^=(SByte &lhs, RValue<SByte> rhs)
1307 return lhs = lhs ^ rhs;
1310 RValue<SByte> operator<<=(SByte &lhs, RValue<SByte> rhs)
1312 return lhs = lhs << rhs;
1315 RValue<SByte> operator>>=(SByte &lhs, RValue<SByte> rhs)
1317 return lhs = lhs >> rhs;
1320 RValue<SByte> operator+(RValue<SByte> val)
1325 RValue<SByte> operator-(RValue<SByte> val)
1327 return RValue<SByte>(Nucleus::createNeg(val.value));
1330 RValue<SByte> operator~(RValue<SByte> val)
1332 return RValue<SByte>(Nucleus::createNot(val.value));
1335 RValue<SByte> operator++(SByte &val, int) // Post-increment
1337 RValue<SByte> res = val;
1339 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantByte((signed char)1)));
1340 val.storeValue(inc);
1345 const SByte &operator++(SByte &val) // Pre-increment
1347 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantByte((signed char)1)));
1348 val.storeValue(inc);
1353 RValue<SByte> operator--(SByte &val, int) // Post-decrement
1355 RValue<SByte> res = val;
1357 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantByte((signed char)1)));
1358 val.storeValue(inc);
1363 const SByte &operator--(SByte &val) // Pre-decrement
1365 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantByte((signed char)1)));
1366 val.storeValue(inc);
1371 RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs)
1373 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
1376 RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs)
1378 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
1381 RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs)
1383 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
1386 RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs)
1388 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
1391 RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs)
1393 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1396 RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs)
1398 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1401 Type *SByte::getType()
1403 return T(llvm::Type::getInt8Ty(*::context));
1406 Short::Short(Argument<Short> argument)
1408 storeValue(argument.value);
1411 Short::Short(RValue<Int> cast)
1413 Value *integer = Nucleus::createTrunc(cast.value, Short::getType());
1415 storeValue(integer);
1422 Short::Short(short x)
1424 storeValue(Nucleus::createConstantShort(x));
1427 Short::Short(RValue<Short> rhs)
1429 storeValue(rhs.value);
1432 Short::Short(const Short &rhs)
1434 Value *value = rhs.loadValue();
1438 Short::Short(const Reference<Short> &rhs)
1440 Value *value = rhs.loadValue();
1444 RValue<Short> Short::operator=(RValue<Short> rhs)
1446 storeValue(rhs.value);
1451 RValue<Short> Short::operator=(const Short &rhs)
1453 Value *value = rhs.loadValue();
1456 return RValue<Short>(value);
1459 RValue<Short> Short::operator=(const Reference<Short> &rhs)
1461 Value *value = rhs.loadValue();
1464 return RValue<Short>(value);
1467 RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs)
1469 return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value));
1472 RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs)
1474 return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value));
1477 RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs)
1479 return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value));
1482 RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs)
1484 return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value));
1487 RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs)
1489 return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value));
1492 RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs)
1494 return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value));
1497 RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs)
1499 return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value));
1502 RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs)
1504 return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value));
1507 RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs)
1509 return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value));
1512 RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs)
1514 return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value));
1517 RValue<Short> operator+=(Short &lhs, RValue<Short> rhs)
1519 return lhs = lhs + rhs;
1522 RValue<Short> operator-=(Short &lhs, RValue<Short> rhs)
1524 return lhs = lhs - rhs;
1527 RValue<Short> operator*=(Short &lhs, RValue<Short> rhs)
1529 return lhs = lhs * rhs;
1532 RValue<Short> operator/=(Short &lhs, RValue<Short> rhs)
1534 return lhs = lhs / rhs;
1537 RValue<Short> operator%=(Short &lhs, RValue<Short> rhs)
1539 return lhs = lhs % rhs;
1542 RValue<Short> operator&=(Short &lhs, RValue<Short> rhs)
1544 return lhs = lhs & rhs;
1547 RValue<Short> operator|=(Short &lhs, RValue<Short> rhs)
1549 return lhs = lhs | rhs;
1552 RValue<Short> operator^=(Short &lhs, RValue<Short> rhs)
1554 return lhs = lhs ^ rhs;
1557 RValue<Short> operator<<=(Short &lhs, RValue<Short> rhs)
1559 return lhs = lhs << rhs;
1562 RValue<Short> operator>>=(Short &lhs, RValue<Short> rhs)
1564 return lhs = lhs >> rhs;
1567 RValue<Short> operator+(RValue<Short> val)
1572 RValue<Short> operator-(RValue<Short> val)
1574 return RValue<Short>(Nucleus::createNeg(val.value));
1577 RValue<Short> operator~(RValue<Short> val)
1579 return RValue<Short>(Nucleus::createNot(val.value));
1582 RValue<Short> operator++(Short &val, int) // Post-increment
1584 RValue<Short> res = val;
1586 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantShort((short)1)));
1587 val.storeValue(inc);
1592 const Short &operator++(Short &val) // Pre-increment
1594 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantShort((short)1)));
1595 val.storeValue(inc);
1600 RValue<Short> operator--(Short &val, int) // Post-decrement
1602 RValue<Short> res = val;
1604 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantShort((short)1)));
1605 val.storeValue(inc);
1610 const Short &operator--(Short &val) // Pre-decrement
1612 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantShort((short)1)));
1613 val.storeValue(inc);
1618 RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs)
1620 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
1623 RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs)
1625 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
1628 RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs)
1630 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
1633 RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs)
1635 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
1638 RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs)
1640 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1643 RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs)
1645 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1648 Type *Short::getType()
1650 return T(llvm::Type::getInt16Ty(*::context));
1653 UShort::UShort(Argument<UShort> argument)
1655 storeValue(argument.value);
1658 UShort::UShort(RValue<UInt> cast)
1660 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
1662 storeValue(integer);
1665 UShort::UShort(RValue<Int> cast)
1667 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
1669 storeValue(integer);
1676 UShort::UShort(unsigned short x)
1678 storeValue(Nucleus::createConstantShort(x));
1681 UShort::UShort(RValue<UShort> rhs)
1683 storeValue(rhs.value);
1686 UShort::UShort(const UShort &rhs)
1688 Value *value = rhs.loadValue();
1692 UShort::UShort(const Reference<UShort> &rhs)
1694 Value *value = rhs.loadValue();
1698 RValue<UShort> UShort::operator=(RValue<UShort> rhs)
1700 storeValue(rhs.value);
1705 RValue<UShort> UShort::operator=(const UShort &rhs)
1707 Value *value = rhs.loadValue();
1710 return RValue<UShort>(value);
1713 RValue<UShort> UShort::operator=(const Reference<UShort> &rhs)
1715 Value *value = rhs.loadValue();
1718 return RValue<UShort>(value);
1721 RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs)
1723 return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value));
1726 RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs)
1728 return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value));
1731 RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs)
1733 return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value));
1736 RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs)
1738 return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value));
1741 RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs)
1743 return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value));
1746 RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs)
1748 return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value));
1751 RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs)
1753 return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value));
1756 RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs)
1758 return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value));
1761 RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs)
1763 return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value));
1766 RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs)
1768 return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value));
1771 RValue<UShort> operator+=(UShort &lhs, RValue<UShort> rhs)
1773 return lhs = lhs + rhs;
1776 RValue<UShort> operator-=(UShort &lhs, RValue<UShort> rhs)
1778 return lhs = lhs - rhs;
1781 RValue<UShort> operator*=(UShort &lhs, RValue<UShort> rhs)
1783 return lhs = lhs * rhs;
1786 RValue<UShort> operator/=(UShort &lhs, RValue<UShort> rhs)
1788 return lhs = lhs / rhs;
1791 RValue<UShort> operator%=(UShort &lhs, RValue<UShort> rhs)
1793 return lhs = lhs % rhs;
1796 RValue<UShort> operator&=(UShort &lhs, RValue<UShort> rhs)
1798 return lhs = lhs & rhs;
1801 RValue<UShort> operator|=(UShort &lhs, RValue<UShort> rhs)
1803 return lhs = lhs | rhs;
1806 RValue<UShort> operator^=(UShort &lhs, RValue<UShort> rhs)
1808 return lhs = lhs ^ rhs;
1811 RValue<UShort> operator<<=(UShort &lhs, RValue<UShort> rhs)
1813 return lhs = lhs << rhs;
1816 RValue<UShort> operator>>=(UShort &lhs, RValue<UShort> rhs)
1818 return lhs = lhs >> rhs;
1821 RValue<UShort> operator+(RValue<UShort> val)
1826 RValue<UShort> operator-(RValue<UShort> val)
1828 return RValue<UShort>(Nucleus::createNeg(val.value));
1831 RValue<UShort> operator~(RValue<UShort> val)
1833 return RValue<UShort>(Nucleus::createNot(val.value));
1836 RValue<UShort> operator++(UShort &val, int) // Post-increment
1838 RValue<UShort> res = val;
1840 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantShort((unsigned short)1)));
1841 val.storeValue(inc);
1846 const UShort &operator++(UShort &val) // Pre-increment
1848 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantShort((unsigned short)1)));
1849 val.storeValue(inc);
1854 RValue<UShort> operator--(UShort &val, int) // Post-decrement
1856 RValue<UShort> res = val;
1858 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantShort((unsigned short)1)));
1859 val.storeValue(inc);
1864 const UShort &operator--(UShort &val) // Pre-decrement
1866 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantShort((unsigned short)1)));
1867 val.storeValue(inc);
1872 RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs)
1874 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1877 RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs)
1879 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1882 RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs)
1884 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1887 RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs)
1889 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1892 RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs)
1894 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1897 RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs)
1899 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1902 Type *UShort::getType()
1904 return T(llvm::Type::getInt16Ty(*::context));
1907 Byte4::Byte4(RValue<Byte8> cast)
1909 // xyzw.parent = this;
1911 storeValue(Nucleus::createTrunc(Nucleus::createBitCast(cast.value, Long::getType()), Int::getType()));
1914 Byte4::Byte4(const Reference<Byte4> &rhs)
1916 // xyzw.parent = this;
1918 Value *value = rhs.loadValue();
1922 Type *Byte4::getType()
1925 return T(VectorType::get(Byte::getType(), 4));
1927 return UInt::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block
1931 Type *SByte4::getType()
1934 return T(VectorType::get(SByte::getType(), 4));
1936 return Int::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block
1942 // xyzw.parent = this;
1945 Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
1947 // xyzw.parent = this;
1949 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
1950 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(Byte::getType(), 8))));
1952 storeValue(Nucleus::createBitCast(vector, getType()));
1955 Byte8::Byte8(RValue<Byte8> rhs)
1957 // xyzw.parent = this;
1959 storeValue(rhs.value);
1962 Byte8::Byte8(const Byte8 &rhs)
1964 // xyzw.parent = this;
1966 Value *value = rhs.loadValue();
1970 Byte8::Byte8(const Reference<Byte8> &rhs)
1972 // xyzw.parent = this;
1974 Value *value = rhs.loadValue();
1978 RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs)
1980 storeValue(rhs.value);
1985 RValue<Byte8> Byte8::operator=(const Byte8 &rhs)
1987 Value *value = rhs.loadValue();
1990 return RValue<Byte8>(value);
1993 RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs)
1995 Value *value = rhs.loadValue();
1998 return RValue<Byte8>(value);
2001 RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs)
2003 if(CPUID::supportsMMX2())
2005 return x86::paddb(lhs, rhs);
2009 return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
2013 RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs)
2015 if(CPUID::supportsMMX2())
2017 return x86::psubb(lhs, rhs);
2021 return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
2025 // RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs)
2027 // return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value));
2030 // RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs)
2032 // return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value));
2035 // RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs)
2037 // return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value));
2040 RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs)
2042 if(CPUID::supportsMMX2())
2044 return As<Byte8>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
2048 return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
2052 RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs)
2054 if(CPUID::supportsMMX2())
2056 return As<Byte8>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
2060 return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
2064 RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs)
2066 if(CPUID::supportsMMX2())
2068 return As<Byte8>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
2072 return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
2076 // RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs)
2078 // return RValue<Byte8>(Nucleus::createShl(lhs.value, rhs.value));
2081 // RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs)
2083 // return RValue<Byte8>(Nucleus::createLShr(lhs.value, rhs.value));
2086 RValue<Byte8> operator+=(Byte8 &lhs, RValue<Byte8> rhs)
2088 return lhs = lhs + rhs;
2091 RValue<Byte8> operator-=(Byte8 &lhs, RValue<Byte8> rhs)
2093 return lhs = lhs - rhs;
2096 // RValue<Byte8> operator*=(Byte8 &lhs, RValue<Byte8> rhs)
2098 // return lhs = lhs * rhs;
2101 // RValue<Byte8> operator/=(Byte8 &lhs, RValue<Byte8> rhs)
2103 // return lhs = lhs / rhs;
2106 // RValue<Byte8> operator%=(Byte8 &lhs, RValue<Byte8> rhs)
2108 // return lhs = lhs % rhs;
2111 RValue<Byte8> operator&=(Byte8 &lhs, RValue<Byte8> rhs)
2113 return lhs = lhs & rhs;
2116 RValue<Byte8> operator|=(Byte8 &lhs, RValue<Byte8> rhs)
2118 return lhs = lhs | rhs;
2121 RValue<Byte8> operator^=(Byte8 &lhs, RValue<Byte8> rhs)
2123 return lhs = lhs ^ rhs;
2126 // RValue<Byte8> operator<<=(Byte8 &lhs, RValue<Byte8> rhs)
2128 // return lhs = lhs << rhs;
2131 // RValue<Byte8> operator>>=(Byte8 &lhs, RValue<Byte8> rhs)
2133 // return lhs = lhs >> rhs;
2136 // RValue<Byte8> operator+(RValue<Byte8> val)
2141 // RValue<Byte8> operator-(RValue<Byte8> val)
2143 // return RValue<Byte8>(Nucleus::createNeg(val.value));
2146 RValue<Byte8> operator~(RValue<Byte8> val)
2148 if(CPUID::supportsMMX2())
2150 return val ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
2154 return RValue<Byte8>(Nucleus::createNot(val.value));
2158 RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2160 return x86::paddusb(x, y);
2163 RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2165 return x86::psubusb(x, y);
2168 RValue<Short4> Unpack(RValue<Byte4> x)
2170 Value *int2 = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), x.value, 0);
2171 Value *byte8 = Nucleus::createBitCast(int2, Byte8::getType());
2173 return UnpackLow(RValue<Byte8>(byte8), RValue<Byte8>(byte8));
2176 RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
2178 if(CPUID::supportsMMX2())
2180 return x86::punpcklbw(x, y);
2184 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
2185 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
2187 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2191 RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
2193 if(CPUID::supportsMMX2())
2195 return x86::punpckhbw(x, y);
2199 int shuffle[8] = {4, 12, 5, 13, 6, 14, 7, 15};
2200 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
2202 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2206 RValue<Int> SignMask(RValue<Byte8> x)
2208 return x86::pmovmskb(x);
2211 // RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2213 // return x86::pcmpgtb(x, y); // FIXME: Signedness
2216 RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2218 return x86::pcmpeqb(x, y);
2221 Type *Byte8::getType()
2223 if(CPUID::supportsMMX2())
2225 return MMX::getType();
2229 return T(VectorType::get(Byte::getType(), 8));
2235 // xyzw.parent = this;
2238 SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2240 // xyzw.parent = this;
2242 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
2243 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(SByte::getType(), 8))));
2245 storeValue(Nucleus::createBitCast(vector, getType()));
2248 SByte8::SByte8(RValue<SByte8> rhs)
2250 // xyzw.parent = this;
2252 storeValue(rhs.value);
2255 SByte8::SByte8(const SByte8 &rhs)
2257 // xyzw.parent = this;
2259 Value *value = rhs.loadValue();
2263 SByte8::SByte8(const Reference<SByte8> &rhs)
2265 // xyzw.parent = this;
2267 Value *value = rhs.loadValue();
2271 RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs)
2273 storeValue(rhs.value);
2278 RValue<SByte8> SByte8::operator=(const SByte8 &rhs)
2280 Value *value = rhs.loadValue();
2283 return RValue<SByte8>(value);
2286 RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs)
2288 Value *value = rhs.loadValue();
2291 return RValue<SByte8>(value);
2294 RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs)
2296 if(CPUID::supportsMMX2())
2298 return As<SByte8>(x86::paddb(As<Byte8>(lhs), As<Byte8>(rhs)));
2302 return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
2306 RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs)
2308 if(CPUID::supportsMMX2())
2310 return As<SByte8>(x86::psubb(As<Byte8>(lhs), As<Byte8>(rhs)));
2314 return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
2318 // RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs)
2320 // return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value));
2323 // RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs)
2325 // return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value));
2328 // RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs)
2330 // return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value));
2333 RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs)
2335 return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value));
2338 RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs)
2340 return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value));
2343 RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs)
2345 return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value));
2348 // RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2350 // return RValue<SByte8>(Nucleus::createShl(lhs.value, rhs.value));
2353 // RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2355 // return RValue<SByte8>(Nucleus::createAShr(lhs.value, rhs.value));
2358 RValue<SByte8> operator+=(SByte8 &lhs, RValue<SByte8> rhs)
2360 return lhs = lhs + rhs;
2363 RValue<SByte8> operator-=(SByte8 &lhs, RValue<SByte8> rhs)
2365 return lhs = lhs - rhs;
2368 // RValue<SByte8> operator*=(SByte8 &lhs, RValue<SByte8> rhs)
2370 // return lhs = lhs * rhs;
2373 // RValue<SByte8> operator/=(SByte8 &lhs, RValue<SByte8> rhs)
2375 // return lhs = lhs / rhs;
2378 // RValue<SByte8> operator%=(SByte8 &lhs, RValue<SByte8> rhs)
2380 // return lhs = lhs % rhs;
2383 RValue<SByte8> operator&=(SByte8 &lhs, RValue<SByte8> rhs)
2385 return lhs = lhs & rhs;
2388 RValue<SByte8> operator|=(SByte8 &lhs, RValue<SByte8> rhs)
2390 return lhs = lhs | rhs;
2393 RValue<SByte8> operator^=(SByte8 &lhs, RValue<SByte8> rhs)
2395 return lhs = lhs ^ rhs;
2398 // RValue<SByte8> operator<<=(SByte8 &lhs, RValue<SByte8> rhs)
2400 // return lhs = lhs << rhs;
2403 // RValue<SByte8> operator>>=(SByte8 &lhs, RValue<SByte8> rhs)
2405 // return lhs = lhs >> rhs;
2408 // RValue<SByte8> operator+(RValue<SByte8> val)
2413 // RValue<SByte8> operator-(RValue<SByte8> val)
2415 // return RValue<SByte8>(Nucleus::createNeg(val.value));
2418 RValue<SByte8> operator~(RValue<SByte8> val)
2420 if(CPUID::supportsMMX2())
2422 return val ^ SByte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
2426 return RValue<SByte8>(Nucleus::createNot(val.value));
2430 RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2432 return x86::paddsb(x, y);
2435 RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2437 return x86::psubsb(x, y);
2440 RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
2442 if(CPUID::supportsMMX2())
2444 return As<Short4>(x86::punpcklbw(As<Byte8>(x), As<Byte8>(y)));
2448 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
2449 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
2451 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2455 RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
2457 if(CPUID::supportsMMX2())
2459 return As<Short4>(x86::punpckhbw(As<Byte8>(x), As<Byte8>(y)));
2463 int shuffle[8] = {4, 12, 5, 13, 6, 14, 7, 15};
2464 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
2466 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2470 RValue<Int> SignMask(RValue<SByte8> x)
2472 return x86::pmovmskb(As<Byte8>(x));
2475 RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2477 return x86::pcmpgtb(x, y);
2480 RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2482 return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
2485 Type *SByte8::getType()
2487 if(CPUID::supportsMMX2())
2489 return MMX::getType();
2493 return T(VectorType::get(SByte::getType(), 8));
2497 Byte16::Byte16(RValue<Byte16> rhs)
2499 // xyzw.parent = this;
2501 storeValue(rhs.value);
2504 Byte16::Byte16(const Byte16 &rhs)
2506 // xyzw.parent = this;
2508 Value *value = rhs.loadValue();
2512 Byte16::Byte16(const Reference<Byte16> &rhs)
2514 // xyzw.parent = this;
2516 Value *value = rhs.loadValue();
2520 RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs)
2522 storeValue(rhs.value);
2527 RValue<Byte16> Byte16::operator=(const Byte16 &rhs)
2529 Value *value = rhs.loadValue();
2532 return RValue<Byte16>(value);
2535 RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs)
2537 Value *value = rhs.loadValue();
2540 return RValue<Byte16>(value);
2543 Type *Byte16::getType()
2545 return T(VectorType::get(Byte::getType(), 16));
2548 Type *SByte16::getType()
2550 return T( VectorType::get(SByte::getType(), 16));
2553 Short2::Short2(RValue<Short4> cast)
2555 storeValue(Nucleus::createTrunc(Nucleus::createBitCast(cast.value, Long::getType()), UInt::getType()));
2558 Type *Short2::getType()
2561 return T(VectorType::get(Short::getType(), 2));
2563 return UInt::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block
2567 UShort2::UShort2(RValue<UShort4> cast)
2569 storeValue(Nucleus::createTrunc(Nucleus::createBitCast(cast.value, Long::getType()), UInt::getType()));
2572 Type *UShort2::getType()
2575 return T(VectorType::get(UShort::getType(), 2));
2577 return UInt::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block
2581 Short4::Short4(RValue<Int> cast)
2583 Value *extend = Nucleus::createZExt(cast.value, Long::getType());
2584 Value *swizzle = Swizzle(RValue<Short4>(extend), 0x00).value;
2586 storeValue(swizzle);
2589 Short4::Short4(RValue<Int4> cast)
2591 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2593 #if 0 // FIXME: Check codegen (pshuflw phshufhw pshufd)
2595 pack[0] = Nucleus::createConstantInt(0);
2596 pack[1] = Nucleus::createConstantInt(2);
2597 pack[2] = Nucleus::createConstantInt(4);
2598 pack[3] = Nucleus::createConstantInt(6);
2600 Value *short4 = Nucleus::createShuffleVector(short8, short8, Nucleus::createConstantVector(pack, 4));
2604 // FIXME: Use Swizzle<Short8>
2605 if(!CPUID::supportsSSSE3())
2607 int pshuflw[8] = {0, 2, 0, 2, 4, 5, 6, 7};
2608 int pshufhw[8] = {0, 1, 2, 3, 4, 6, 4, 6};
2610 Value *shuffle1 = Nucleus::createShuffleVector(short8, short8, pshuflw);
2611 Value *shuffle2 = Nucleus::createShuffleVector(shuffle1, shuffle1, pshufhw);
2612 Value *int4 = Nucleus::createBitCast(shuffle2, Int4::getType());
2613 packed = createSwizzle4(int4, 0x88);
2617 int pshufb[16] = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
2618 Value *byte16 = Nucleus::createBitCast(cast.value, Byte16::getType());
2619 packed = Nucleus::createShuffleVector(byte16, byte16, pshufb);
2622 #if 0 // FIXME: No optimal instruction selection
2623 Value *qword2 = Nucleus::createBitCast(packed, T(VectorType::get(Long::getType(), 2)));
2624 Value *element = Nucleus::createExtractElement(qword2, 0);
2625 Value *short4 = Nucleus::createBitCast(element, Short4::getType());
2626 #else // FIXME: Requires SSE
2627 Value *int2 = RValue<Int2>(Int2(RValue<Int4>(packed))).value;
2628 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
2635 // Short4::Short4(RValue<Float> cast)
2639 Short4::Short4(RValue<Float4> cast)
2641 Int4 v4i32 = Int4(cast);
2642 v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
2644 storeValue(As<Short4>(Int2(v4i32)).value);
2649 // xyzw.parent = this;
2652 Short4::Short4(short xyzw)
2654 // xyzw.parent = this;
2656 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
2657 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(Short::getType(), 4))));
2659 storeValue(Nucleus::createBitCast(vector, getType()));
2662 Short4::Short4(short x, short y, short z, short w)
2664 // xyzw.parent = this;
2666 int64_t constantVector[4] = {x, y, z, w};
2667 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(Short::getType(), 4))));
2669 storeValue(Nucleus::createBitCast(vector, getType()));
2672 Short4::Short4(RValue<Short4> rhs)
2674 // xyzw.parent = this;
2676 storeValue(rhs.value);
2679 Short4::Short4(const Short4 &rhs)
2681 // xyzw.parent = this;
2683 Value *value = rhs.loadValue();
2687 Short4::Short4(const Reference<Short4> &rhs)
2689 // xyzw.parent = this;
2691 Value *value = rhs.loadValue();
2695 Short4::Short4(RValue<UShort4> rhs)
2697 // xyzw.parent = this;
2699 storeValue(rhs.value);
2702 Short4::Short4(const UShort4 &rhs)
2704 // xyzw.parent = this;
2706 storeValue(rhs.loadValue());
2709 Short4::Short4(const Reference<UShort4> &rhs)
2711 // xyzw.parent = this;
2713 storeValue(rhs.loadValue());
2716 RValue<Short4> Short4::operator=(RValue<Short4> rhs)
2718 storeValue(rhs.value);
2723 RValue<Short4> Short4::operator=(const Short4 &rhs)
2725 Value *value = rhs.loadValue();
2728 return RValue<Short4>(value);
2731 RValue<Short4> Short4::operator=(const Reference<Short4> &rhs)
2733 Value *value = rhs.loadValue();
2736 return RValue<Short4>(value);
2739 RValue<Short4> Short4::operator=(RValue<UShort4> rhs)
2741 storeValue(rhs.value);
2743 return RValue<Short4>(rhs);
2746 RValue<Short4> Short4::operator=(const UShort4 &rhs)
2748 Value *value = rhs.loadValue();
2751 return RValue<Short4>(value);
2754 RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs)
2756 Value *value = rhs.loadValue();
2759 return RValue<Short4>(value);
2762 RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs)
2764 if(CPUID::supportsMMX2())
2766 return x86::paddw(lhs, rhs);
2770 return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
2774 RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs)
2776 if(CPUID::supportsMMX2())
2778 return x86::psubw(lhs, rhs);
2782 return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
2786 RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs)
2788 if(CPUID::supportsMMX2())
2790 return x86::pmullw(lhs, rhs);
2794 return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
2798 // RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs)
2800 // return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value));
2803 // RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs)
2805 // return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value));
2808 RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs)
2810 if(CPUID::supportsMMX2())
2812 return x86::pand(lhs, rhs);
2816 return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
2820 RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs)
2822 if(CPUID::supportsMMX2())
2824 return x86::por(lhs, rhs);
2828 return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
2832 RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs)
2834 if(CPUID::supportsMMX2())
2836 return x86::pxor(lhs, rhs);
2840 return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
2844 RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2846 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2848 return x86::psllw(lhs, rhs);
2851 RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2853 // return RValue<Short4>(Nucleus::createAShr(lhs.value, rhs.value));
2855 return x86::psraw(lhs, rhs);
2858 RValue<Short4> operator+=(Short4 &lhs, RValue<Short4> rhs)
2860 return lhs = lhs + rhs;
2863 RValue<Short4> operator-=(Short4 &lhs, RValue<Short4> rhs)
2865 return lhs = lhs - rhs;
2868 RValue<Short4> operator*=(Short4 &lhs, RValue<Short4> rhs)
2870 return lhs = lhs * rhs;
2873 // RValue<Short4> operator/=(Short4 &lhs, RValue<Short4> rhs)
2875 // return lhs = lhs / rhs;
2878 // RValue<Short4> operator%=(Short4 &lhs, RValue<Short4> rhs)
2880 // return lhs = lhs % rhs;
2883 RValue<Short4> operator&=(Short4 &lhs, RValue<Short4> rhs)
2885 return lhs = lhs & rhs;
2888 RValue<Short4> operator|=(Short4 &lhs, RValue<Short4> rhs)
2890 return lhs = lhs | rhs;
2893 RValue<Short4> operator^=(Short4 &lhs, RValue<Short4> rhs)
2895 return lhs = lhs ^ rhs;
2898 RValue<Short4> operator<<=(Short4 &lhs, unsigned char rhs)
2900 return lhs = lhs << rhs;
2903 RValue<Short4> operator>>=(Short4 &lhs, unsigned char rhs)
2905 return lhs = lhs >> rhs;
2908 // RValue<Short4> operator+(RValue<Short4> val)
2913 RValue<Short4> operator-(RValue<Short4> val)
2915 if(CPUID::supportsMMX2())
2917 return Short4(0, 0, 0, 0) - val;
2921 return RValue<Short4>(Nucleus::createNeg(val.value));
2925 RValue<Short4> operator~(RValue<Short4> val)
2927 if(CPUID::supportsMMX2())
2929 return val ^ Short4(0xFFFFu, 0xFFFFu, 0xFFFFu, 0xFFFFu);
2933 return RValue<Short4>(Nucleus::createNot(val.value));
2937 RValue<Short4> RoundShort4(RValue<Float4> cast)
2939 RValue<Int4> v4i32 = x86::cvtps2dq(cast);
2940 RValue<Short8> v8i16 = x86::packssdw(v4i32, v4i32);
2942 return As<Short4>(Int2(As<Int4>(v8i16)));
2945 RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2947 return x86::pmaxsw(x, y);
2950 RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
2952 return x86::pminsw(x, y);
2955 RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
2957 return x86::paddsw(x, y);
2960 RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
2962 return x86::psubsw(x, y);
2965 RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
2967 return x86::pmulhw(x, y);
2970 RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
2972 return x86::pmaddwd(x, y);
2975 RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y)
2977 return x86::packsswb(x, y);
2980 RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
2982 if(CPUID::supportsMMX2())
2984 return x86::punpcklwd(x, y);
2988 int shuffle[4] = {0, 4, 1, 5};
2989 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
2991 return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType()));
2995 RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
2997 if(CPUID::supportsMMX2())
2999 return x86::punpckhwd(x, y);
3003 int shuffle[4] = {2, 6, 3, 7};
3004 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
3006 return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType()));
3010 RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select)
3012 if(CPUID::supportsMMX2())
3014 return x86::pshufw(x, select);
3018 return RValue<Short4>(createSwizzle4(x.value, select));
3022 RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i)
3024 if(CPUID::supportsMMX2())
3026 return x86::pinsrw(val, Int(element), i);
3030 return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
3034 RValue<Short> Extract(RValue<Short4> val, int i)
3036 if(CPUID::supportsMMX2())
3038 return Short(x86::pextrw(val, i));
3042 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
3046 RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
3048 return x86::pcmpgtw(x, y);
3051 RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
3053 return x86::pcmpeqw(x, y);
3056 Type *Short4::getType()
3058 if(CPUID::supportsMMX2())
3060 return MMX::getType();
3064 return T(VectorType::get(Short::getType(), 4));
3068 UShort4::UShort4(RValue<Int4> cast)
3070 *this = Short4(cast);
3073 UShort4::UShort4(RValue<Float4> cast, bool saturate)
3079 if(CPUID::supportsSSE4_1())
3081 sat = Min(cast, Float4(0xFFFF)); // packusdw takes care of 0x0000 saturation
3085 sat = Max(Min(cast, Float4(0xFFFF)), Float4(0x0000));
3095 if(!saturate || !CPUID::supportsSSE4_1())
3097 *this = Short4(Int4(int4));
3101 *this = As<Short4>(Int2(As<Int4>(x86::packusdw(As<UInt4>(int4), As<UInt4>(int4)))));
3107 // xyzw.parent = this;
3110 UShort4::UShort4(unsigned short xyzw)
3112 // xyzw.parent = this;
3114 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
3115 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(UShort::getType(), 4))));
3117 storeValue(Nucleus::createBitCast(vector, getType()));
3120 UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
3122 // xyzw.parent = this;
3124 int64_t constantVector[4] = {x, y, z, w};
3125 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(UShort::getType(), 4))));
3127 storeValue(Nucleus::createBitCast(vector, getType()));
3130 UShort4::UShort4(RValue<UShort4> rhs)
3132 // xyzw.parent = this;
3134 storeValue(rhs.value);
3137 UShort4::UShort4(const UShort4 &rhs)
3139 // xyzw.parent = this;
3141 Value *value = rhs.loadValue();
3145 UShort4::UShort4(const Reference<UShort4> &rhs)
3147 // xyzw.parent = this;
3149 Value *value = rhs.loadValue();
3153 UShort4::UShort4(RValue<Short4> rhs)
3155 // xyzw.parent = this;
3157 storeValue(rhs.value);
3160 UShort4::UShort4(const Short4 &rhs)
3162 // xyzw.parent = this;
3164 Value *value = rhs.loadValue();
3168 UShort4::UShort4(const Reference<Short4> &rhs)
3170 // xyzw.parent = this;
3172 Value *value = rhs.loadValue();
3176 RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs)
3178 storeValue(rhs.value);
3183 RValue<UShort4> UShort4::operator=(const UShort4 &rhs)
3185 Value *value = rhs.loadValue();
3188 return RValue<UShort4>(value);
3191 RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs)
3193 Value *value = rhs.loadValue();
3196 return RValue<UShort4>(value);
3199 RValue<UShort4> UShort4::operator=(RValue<Short4> rhs)
3201 storeValue(rhs.value);
3203 return RValue<UShort4>(rhs);
3206 RValue<UShort4> UShort4::operator=(const Short4 &rhs)
3208 Value *value = rhs.loadValue();
3211 return RValue<UShort4>(value);
3214 RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs)
3216 Value *value = rhs.loadValue();
3219 return RValue<UShort4>(value);
3222 RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs)
3224 if(CPUID::supportsMMX2())
3226 return As<UShort4>(x86::paddw(As<Short4>(lhs), As<Short4>(rhs)));
3230 return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
3234 RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs)
3236 if(CPUID::supportsMMX2())
3238 return As<UShort4>(x86::psubw(As<Short4>(lhs), As<Short4>(rhs)));
3242 return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
3246 RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs)
3248 if(CPUID::supportsMMX2())
3250 return As<UShort4>(x86::pmullw(As<Short4>(lhs), As<Short4>(rhs)));
3254 return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
3258 RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs)
3260 if(CPUID::supportsMMX2())
3262 return As<UShort4>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
3266 return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value));
3270 RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs)
3272 if(CPUID::supportsMMX2())
3274 return As<UShort4>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
3278 return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value));
3282 RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs)
3284 if(CPUID::supportsMMX2())
3286 return As<UShort4>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
3290 return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
3294 RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
3296 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
3298 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
3301 RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
3303 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
3305 return x86::psrlw(lhs, rhs);
3308 RValue<UShort4> operator<<=(UShort4 &lhs, unsigned char rhs)
3310 return lhs = lhs << rhs;
3313 RValue<UShort4> operator>>=(UShort4 &lhs, unsigned char rhs)
3315 return lhs = lhs >> rhs;
3318 RValue<UShort4> operator~(RValue<UShort4> val)
3320 if(CPUID::supportsMMX2())
3322 return As<UShort4>(As<Short4>(val) ^ Short4(0xFFFFu, 0xFFFFu, 0xFFFFu, 0xFFFFu));
3326 return RValue<UShort4>(Nucleus::createNot(val.value));
3330 RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
3332 return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
3335 RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
3337 return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
3340 RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
3342 return x86::paddusw(x, y);
3345 RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
3347 return x86::psubusw(x, y);
3350 RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
3352 return x86::pmulhuw(x, y);
3355 RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3357 return x86::pavgw(x, y);
3360 RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y)
3362 return x86::packuswb(x, y);
3365 Type *UShort4::getType()
3367 if(CPUID::supportsMMX2())
3369 return MMX::getType();
3373 return T(VectorType::get(UShort::getType(), 4));
3377 Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
3379 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3380 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3383 Short8::Short8(RValue<Short8> rhs)
3385 storeValue(rhs.value);
3388 Short8::Short8(const Reference<Short8> &rhs)
3390 Value *value = rhs.loadValue();
3394 Short8::Short8(RValue<Short4> lo, RValue<Short4> hi)
3396 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
3397 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
3399 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
3400 long2 = Nucleus::createInsertElement(long2, loLong, 0);
3401 long2 = Nucleus::createInsertElement(long2, hiLong, 1);
3402 Value *short8 = Nucleus::createBitCast(long2, Short8::getType());
3407 RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs)
3409 return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value));
3412 RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs)
3414 return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
3417 RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3419 return x86::psllw(lhs, rhs); // FIXME: Fallback required
3422 RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3424 return x86::psraw(lhs, rhs); // FIXME: Fallback required
3427 RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3429 return x86::pmaddwd(x, y); // FIXME: Fallback required
3432 RValue<Int4> Abs(RValue<Int4> x)
3434 if(CPUID::supportsSSSE3())
3436 return x86::pabsd(x);
3440 Int4 mask = (x >> 31);
3441 return (mask ^ x) - mask;
3445 RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3447 return x86::pmulhw(x, y); // FIXME: Fallback required
3450 Type *Short8::getType()
3452 return T(VectorType::get(Short::getType(), 8));
3455 UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
3457 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3458 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3461 UShort8::UShort8(RValue<UShort8> rhs)
3463 storeValue(rhs.value);
3466 UShort8::UShort8(const Reference<UShort8> &rhs)
3468 Value *value = rhs.loadValue();
3472 UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi)
3474 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
3475 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
3477 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
3478 long2 = Nucleus::createInsertElement(long2, loLong, 0);
3479 long2 = Nucleus::createInsertElement(long2, hiLong, 1);
3480 Value *short8 = Nucleus::createBitCast(long2, Short8::getType());
3485 RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs)
3487 storeValue(rhs.value);
3492 RValue<UShort8> UShort8::operator=(const UShort8 &rhs)
3494 Value *value = rhs.loadValue();
3497 return RValue<UShort8>(value);
3500 RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs)
3502 Value *value = rhs.loadValue();
3505 return RValue<UShort8>(value);
3508 RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs)
3510 return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
3513 RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3515 return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs)); // FIXME: Fallback required
3518 RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3520 return x86::psrlw(lhs, rhs); // FIXME: Fallback required
3523 RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
3525 return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value));
3528 RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs)
3530 return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value));
3533 RValue<UShort8> operator+=(UShort8 &lhs, RValue<UShort8> rhs)
3535 return lhs = lhs + rhs;
3538 RValue<UShort8> operator~(RValue<UShort8> val)
3540 return RValue<UShort8>(Nucleus::createNot(val.value));
3543 RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
3565 Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
3566 Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb);
3567 Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
3569 return RValue<UShort8>(short8);
3572 RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3574 return x86::pmulhuw(x, y); // FIXME: Fallback required
3577 Type *UShort8::getType()
3579 return T(VectorType::get(UShort::getType(), 8));
3582 Int::Int(Argument<Int> argument)
3584 storeValue(argument.value);
3587 Int::Int(RValue<Byte> cast)
3589 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3591 storeValue(integer);
3594 Int::Int(RValue<SByte> cast)
3596 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3598 storeValue(integer);
3601 Int::Int(RValue<Short> cast)
3603 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3605 storeValue(integer);
3608 Int::Int(RValue<UShort> cast)
3610 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3612 storeValue(integer);
3615 Int::Int(RValue<Int2> cast)
3617 *this = Extract(cast, 0);
3620 Int::Int(RValue<Long> cast)
3622 Value *integer = Nucleus::createTrunc(cast.value, Int::getType());
3624 storeValue(integer);
3627 Int::Int(RValue<Float> cast)
3629 Value *integer = Nucleus::createFPToSI(cast.value, Int::getType());
3631 storeValue(integer);
3640 storeValue(Nucleus::createConstantInt(x));
3643 Int::Int(RValue<Int> rhs)
3645 storeValue(rhs.value);
3648 Int::Int(RValue<UInt> rhs)
3650 storeValue(rhs.value);
3653 Int::Int(const Int &rhs)
3655 Value *value = rhs.loadValue();
3659 Int::Int(const Reference<Int> &rhs)
3661 Value *value = rhs.loadValue();
3665 Int::Int(const UInt &rhs)
3667 Value *value = rhs.loadValue();
3671 Int::Int(const Reference<UInt> &rhs)
3673 Value *value = rhs.loadValue();
3677 RValue<Int> Int::operator=(int rhs)
3679 return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs)));
3682 RValue<Int> Int::operator=(RValue<Int> rhs)
3684 storeValue(rhs.value);
3689 RValue<Int> Int::operator=(RValue<UInt> rhs)
3691 storeValue(rhs.value);
3693 return RValue<Int>(rhs);
3696 RValue<Int> Int::operator=(const Int &rhs)
3698 Value *value = rhs.loadValue();
3701 return RValue<Int>(value);
3704 RValue<Int> Int::operator=(const Reference<Int> &rhs)
3706 Value *value = rhs.loadValue();
3709 return RValue<Int>(value);
3712 RValue<Int> Int::operator=(const UInt &rhs)
3714 Value *value = rhs.loadValue();
3717 return RValue<Int>(value);
3720 RValue<Int> Int::operator=(const Reference<UInt> &rhs)
3722 Value *value = rhs.loadValue();
3725 return RValue<Int>(value);
3728 RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs)
3730 return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value));
3733 RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs)
3735 return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value));
3738 RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs)
3740 return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value));
3743 RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs)
3745 return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value));
3748 RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs)
3750 return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value));
3753 RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs)
3755 return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value));
3758 RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs)
3760 return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value));
3763 RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs)
3765 return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value));
3768 RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs)
3770 return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value));
3773 RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs)
3775 return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value));
3778 RValue<Int> operator+=(Int &lhs, RValue<Int> rhs)
3780 return lhs = lhs + rhs;
3783 RValue<Int> operator-=(Int &lhs, RValue<Int> rhs)
3785 return lhs = lhs - rhs;
3788 RValue<Int> operator*=(Int &lhs, RValue<Int> rhs)
3790 return lhs = lhs * rhs;
3793 RValue<Int> operator/=(Int &lhs, RValue<Int> rhs)
3795 return lhs = lhs / rhs;
3798 RValue<Int> operator%=(Int &lhs, RValue<Int> rhs)
3800 return lhs = lhs % rhs;
3803 RValue<Int> operator&=(Int &lhs, RValue<Int> rhs)
3805 return lhs = lhs & rhs;
3808 RValue<Int> operator|=(Int &lhs, RValue<Int> rhs)
3810 return lhs = lhs | rhs;
3813 RValue<Int> operator^=(Int &lhs, RValue<Int> rhs)
3815 return lhs = lhs ^ rhs;
3818 RValue<Int> operator<<=(Int &lhs, RValue<Int> rhs)
3820 return lhs = lhs << rhs;
3823 RValue<Int> operator>>=(Int &lhs, RValue<Int> rhs)
3825 return lhs = lhs >> rhs;
3828 RValue<Int> operator+(RValue<Int> val)
3833 RValue<Int> operator-(RValue<Int> val)
3835 return RValue<Int>(Nucleus::createNeg(val.value));
3838 RValue<Int> operator~(RValue<Int> val)
3840 return RValue<Int>(Nucleus::createNot(val.value));
3843 RValue<Int> operator++(Int &val, int) // Post-increment
3845 RValue<Int> res = val;
3847 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantInt(1)));
3848 val.storeValue(inc);
3853 const Int &operator++(Int &val) // Pre-increment
3855 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantInt(1)));
3856 val.storeValue(inc);
3861 RValue<Int> operator--(Int &val, int) // Post-decrement
3863 RValue<Int> res = val;
3865 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantInt(1)));
3866 val.storeValue(inc);
3871 const Int &operator--(Int &val) // Pre-decrement
3873 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantInt(1)));
3874 val.storeValue(inc);
3879 RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs)
3881 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
3884 RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs)
3886 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
3889 RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs)
3891 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
3894 RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs)
3896 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
3899 RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs)
3901 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
3904 RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs)
3906 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
3909 RValue<Int> Max(RValue<Int> x, RValue<Int> y)
3911 return IfThenElse(x > y, x, y);
3914 RValue<Int> Min(RValue<Int> x, RValue<Int> y)
3916 return IfThenElse(x < y, x, y);
3919 RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max)
3921 return Min(Max(x, min), max);
3924 RValue<Int> RoundInt(RValue<Float> cast)
3926 return x86::cvtss2si(cast);
3928 // return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f));
3931 Type *Int::getType()
3933 return T(llvm::Type::getInt32Ty(*::context));
3936 Long::Long(RValue<Int> cast)
3938 Value *integer = Nucleus::createSExt(cast.value, Long::getType());
3940 storeValue(integer);
3943 Long::Long(RValue<UInt> cast)
3945 Value *integer = Nucleus::createZExt(cast.value, Long::getType());
3947 storeValue(integer);
3954 Long::Long(RValue<Long> rhs)
3956 storeValue(rhs.value);
3959 RValue<Long> Long::operator=(int64_t rhs)
3961 return RValue<Long>(storeValue(Nucleus::createConstantLong(rhs)));
3964 RValue<Long> Long::operator=(RValue<Long> rhs)
3966 storeValue(rhs.value);
3971 RValue<Long> Long::operator=(const Long &rhs)
3973 Value *value = rhs.loadValue();
3976 return RValue<Long>(value);
3979 RValue<Long> Long::operator=(const Reference<Long> &rhs)
3981 Value *value = rhs.loadValue();
3984 return RValue<Long>(value);
3987 RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs)
3989 return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value));
3992 RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs)
3994 return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
3997 RValue<Long> operator+=(Long &lhs, RValue<Long> rhs)
3999 return lhs = lhs + rhs;
4002 RValue<Long> operator-=(Long &lhs, RValue<Long> rhs)
4004 return lhs = lhs - rhs;
4007 RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y)
4009 return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value));
4012 Type *Long::getType()
4014 return T(llvm::Type::getInt64Ty(*::context));
4017 UInt::UInt(Argument<UInt> argument)
4019 storeValue(argument.value);
4022 UInt::UInt(RValue<UShort> cast)
4024 Value *integer = Nucleus::createZExt(cast.value, UInt::getType());
4026 storeValue(integer);
4029 UInt::UInt(RValue<Long> cast)
4031 Value *integer = Nucleus::createTrunc(cast.value, UInt::getType());
4033 storeValue(integer);
4036 UInt::UInt(RValue<Float> cast)
4038 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
4039 // Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType());
4041 // Smallest positive value representable in UInt, but not in Int
4042 const unsigned int ustart = 0x80000000u;
4043 const float ustartf = float(ustart);
4045 // If the value is negative, store 0, otherwise store the result of the conversion
4046 storeValue((~(As<Int>(cast) >> 31) &
4047 // Check if the value can be represented as an Int
4048 IfThenElse(cast >= ustartf,
4049 // If the value is too large, subtract ustart and re-add it after conversion.
4050 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
4051 // Otherwise, just convert normally
4061 storeValue(Nucleus::createConstantInt(x));
4064 UInt::UInt(unsigned int x)
4066 storeValue(Nucleus::createConstantInt(x));
4069 UInt::UInt(RValue<UInt> rhs)
4071 storeValue(rhs.value);
4074 UInt::UInt(RValue<Int> rhs)
4076 storeValue(rhs.value);
4079 UInt::UInt(const UInt &rhs)
4081 Value *value = rhs.loadValue();
4085 UInt::UInt(const Reference<UInt> &rhs)
4087 Value *value = rhs.loadValue();
4091 UInt::UInt(const Int &rhs)
4093 Value *value = rhs.loadValue();
4097 UInt::UInt(const Reference<Int> &rhs)
4099 Value *value = rhs.loadValue();
4103 RValue<UInt> UInt::operator=(unsigned int rhs)
4105 return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs)));
4108 RValue<UInt> UInt::operator=(RValue<UInt> rhs)
4110 storeValue(rhs.value);
4115 RValue<UInt> UInt::operator=(RValue<Int> rhs)
4117 storeValue(rhs.value);
4119 return RValue<UInt>(rhs);
4122 RValue<UInt> UInt::operator=(const UInt &rhs)
4124 Value *value = rhs.loadValue();
4127 return RValue<UInt>(value);
4130 RValue<UInt> UInt::operator=(const Reference<UInt> &rhs)
4132 Value *value = rhs.loadValue();
4135 return RValue<UInt>(value);
4138 RValue<UInt> UInt::operator=(const Int &rhs)
4140 Value *value = rhs.loadValue();
4143 return RValue<UInt>(value);
4146 RValue<UInt> UInt::operator=(const Reference<Int> &rhs)
4148 Value *value = rhs.loadValue();
4151 return RValue<UInt>(value);
4154 RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs)
4156 return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value));
4159 RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs)
4161 return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value));
4164 RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs)
4166 return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value));
4169 RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs)
4171 return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value));
4174 RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs)
4176 return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value));
4179 RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs)
4181 return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value));
4184 RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs)
4186 return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value));
4189 RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs)
4191 return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value));
4194 RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs)
4196 return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value));
4199 RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs)
4201 return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value));
4204 RValue<UInt> operator+=(UInt &lhs, RValue<UInt> rhs)
4206 return lhs = lhs + rhs;
4209 RValue<UInt> operator-=(UInt &lhs, RValue<UInt> rhs)
4211 return lhs = lhs - rhs;
4214 RValue<UInt> operator*=(UInt &lhs, RValue<UInt> rhs)
4216 return lhs = lhs * rhs;
4219 RValue<UInt> operator/=(UInt &lhs, RValue<UInt> rhs)
4221 return lhs = lhs / rhs;
4224 RValue<UInt> operator%=(UInt &lhs, RValue<UInt> rhs)
4226 return lhs = lhs % rhs;
4229 RValue<UInt> operator&=(UInt &lhs, RValue<UInt> rhs)
4231 return lhs = lhs & rhs;
4234 RValue<UInt> operator|=(UInt &lhs, RValue<UInt> rhs)
4236 return lhs = lhs | rhs;
4239 RValue<UInt> operator^=(UInt &lhs, RValue<UInt> rhs)
4241 return lhs = lhs ^ rhs;
4244 RValue<UInt> operator<<=(UInt &lhs, RValue<UInt> rhs)
4246 return lhs = lhs << rhs;
4249 RValue<UInt> operator>>=(UInt &lhs, RValue<UInt> rhs)
4251 return lhs = lhs >> rhs;
4254 RValue<UInt> operator+(RValue<UInt> val)
4259 RValue<UInt> operator-(RValue<UInt> val)
4261 return RValue<UInt>(Nucleus::createNeg(val.value));
4264 RValue<UInt> operator~(RValue<UInt> val)
4266 return RValue<UInt>(Nucleus::createNot(val.value));
4269 RValue<UInt> operator++(UInt &val, int) // Post-increment
4271 RValue<UInt> res = val;
4273 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantInt(1)));
4274 val.storeValue(inc);
4279 const UInt &operator++(UInt &val) // Pre-increment
4281 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantInt(1)));
4282 val.storeValue(inc);
4287 RValue<UInt> operator--(UInt &val, int) // Post-decrement
4289 RValue<UInt> res = val;
4291 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantInt(1)));
4292 val.storeValue(inc);
4297 const UInt &operator--(UInt &val) // Pre-decrement
4299 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantInt(1)));
4300 val.storeValue(inc);
4305 RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y)
4307 return IfThenElse(x > y, x, y);
4310 RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y)
4312 return IfThenElse(x < y, x, y);
4315 RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max)
4317 return Min(Max(x, min), max);
4320 RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs)
4322 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
4325 RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs)
4327 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
4330 RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs)
4332 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
4335 RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs)
4337 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
4340 RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs)
4342 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4345 RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs)
4347 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4350 // RValue<UInt> RoundUInt(RValue<Float> cast)
4352 // return x86::cvtss2si(val); // FIXME: Unsigned
4354 // // return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f));
4357 Type *UInt::getType()
4359 return T(llvm::Type::getInt32Ty(*::context));
4362 // Int2::Int2(RValue<Int> cast)
4364 // Value *extend = Nucleus::createZExt(cast.value, Long::getType());
4365 // Value *vector = Nucleus::createBitCast(extend, Int2::getType());
4367 // int shuffle[2] = {0, 0};
4368 // Value *replicate = Nucleus::createShuffleVector(vector, vector, shuffle);
4370 // storeValue(replicate);
4373 Int2::Int2(RValue<Int4> cast)
4375 Value *long2 = Nucleus::createBitCast(cast.value, T(VectorType::get(Long::getType(), 2)));
4376 Value *element = Nucleus::createExtractElement(long2, Long::getType(), 0);
4377 Value *int2 = Nucleus::createBitCast(element, Int2::getType());
4384 // xy.parent = this;
4387 Int2::Int2(int x, int y)
4389 // xy.parent = this;
4391 int64_t constantVector[2] = {x, y};
4392 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(Int::getType(), 2))));
4394 storeValue(Nucleus::createBitCast(vector, getType()));
4397 Int2::Int2(RValue<Int2> rhs)
4399 // xy.parent = this;
4401 storeValue(rhs.value);
4404 Int2::Int2(const Int2 &rhs)
4406 // xy.parent = this;
4408 Value *value = rhs.loadValue();
4412 Int2::Int2(const Reference<Int2> &rhs)
4414 // xy.parent = this;
4416 Value *value = rhs.loadValue();
4420 Int2::Int2(RValue<Int> lo, RValue<Int> hi)
4422 if(CPUID::supportsMMX2())
4426 // punpckldq mm0, mm1
4428 Value *loLong = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), lo.value, 0);
4429 loLong = Nucleus::createInsertElement(loLong, V(ConstantInt::get(Int::getType(), 0)), 1);
4430 Value *hiLong = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), hi.value, 0);
4431 hiLong = Nucleus::createInsertElement(hiLong, V(ConstantInt::get(Int::getType(), 0)), 1);
4433 storeValue(As<Int2>(UnpackLow(As<Int2>(loLong), As<Int2>(hiLong))).value);
4437 int shuffle[2] = {0, 1};
4438 Value *packed = Nucleus::createShuffleVector(Nucleus::createBitCast(lo.value, T(VectorType::get(Int::getType(), 1))), Nucleus::createBitCast(hi.value, T(VectorType::get(Int::getType(), 1))), shuffle);
4440 storeValue(Nucleus::createBitCast(packed, Int2::getType()));
4444 RValue<Int2> Int2::operator=(RValue<Int2> rhs)
4446 storeValue(rhs.value);
4451 RValue<Int2> Int2::operator=(const Int2 &rhs)
4453 Value *value = rhs.loadValue();
4456 return RValue<Int2>(value);
4459 RValue<Int2> Int2::operator=(const Reference<Int2> &rhs)
4461 Value *value = rhs.loadValue();
4464 return RValue<Int2>(value);
4467 RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs)
4469 if(CPUID::supportsMMX2())
4471 return x86::paddd(lhs, rhs);
4475 return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
4479 RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs)
4481 if(CPUID::supportsMMX2())
4483 return x86::psubd(lhs, rhs);
4487 return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
4491 // RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs)
4493 // return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value));
4496 // RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs)
4498 // return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value));
4501 // RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs)
4503 // return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value));
4506 RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs)
4508 if(CPUID::supportsMMX2())
4510 return As<Int2>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
4514 return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
4518 RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs)
4520 if(CPUID::supportsMMX2())
4522 return As<Int2>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
4526 return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
4530 RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs)
4532 if(CPUID::supportsMMX2())
4534 return As<Int2>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
4538 return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
4542 RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
4544 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
4546 return x86::pslld(lhs, rhs);
4549 RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
4551 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
4553 return x86::psrad(lhs, rhs);
4556 RValue<Int2> operator+=(Int2 &lhs, RValue<Int2> rhs)
4558 return lhs = lhs + rhs;
4561 RValue<Int2> operator-=(Int2 &lhs, RValue<Int2> rhs)
4563 return lhs = lhs - rhs;
4566 // RValue<Int2> operator*=(Int2 &lhs, RValue<Int2> rhs)
4568 // return lhs = lhs * rhs;
4571 // RValue<Int2> operator/=(Int2 &lhs, RValue<Int2> rhs)
4573 // return lhs = lhs / rhs;
4576 // RValue<Int2> operator%=(Int2 &lhs, RValue<Int2> rhs)
4578 // return lhs = lhs % rhs;
4581 RValue<Int2> operator&=(Int2 &lhs, RValue<Int2> rhs)
4583 return lhs = lhs & rhs;
4586 RValue<Int2> operator|=(Int2 &lhs, RValue<Int2> rhs)
4588 return lhs = lhs | rhs;
4591 RValue<Int2> operator^=(Int2 &lhs, RValue<Int2> rhs)
4593 return lhs = lhs ^ rhs;
4596 RValue<Int2> operator<<=(Int2 &lhs, unsigned char rhs)
4598 return lhs = lhs << rhs;
4601 RValue<Int2> operator>>=(Int2 &lhs, unsigned char rhs)
4603 return lhs = lhs >> rhs;
4606 // RValue<Int2> operator+(RValue<Int2> val)
4611 // RValue<Int2> operator-(RValue<Int2> val)
4613 // return RValue<Int2>(Nucleus::createNeg(val.value));
4616 RValue<Int2> operator~(RValue<Int2> val)
4618 if(CPUID::supportsMMX2())
4620 return val ^ Int2(0xFFFFFFFF, 0xFFFFFFFF);
4624 return RValue<Int2>(Nucleus::createNot(val.value));
4628 RValue<Short4> UnpackLow(RValue<Int2> x, RValue<Int2> y)
4630 if(CPUID::supportsMMX2())
4632 return x86::punpckldq(x, y);
4636 int shuffle[2] = {0, 2};
4637 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
4639 return As<Short4>(packed);
4643 RValue<Short4> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
4645 if(CPUID::supportsMMX2())
4647 return x86::punpckhdq(x, y);
4651 int shuffle[2] = {1, 3};
4652 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
4654 return As<Short4>(packed);
4658 RValue<Int> Extract(RValue<Int2> val, int i)
4660 if(false) // FIXME: LLVM does not generate optimal code
4662 return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i));
4668 return RValue<Int>(Nucleus::createExtractElement(Nucleus::createBitCast(val.value, T(VectorType::get(Int::getType(), 2))), Int::getType(), 0));
4672 Int2 val2 = As<Int2>(UnpackHigh(val, val));
4674 return Extract(val2, 0);
4679 RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i)
4681 return RValue<Int2>(Nucleus::createBitCast(Nucleus::createInsertElement(Nucleus::createBitCast(val.value, T(VectorType::get(Int::getType(), 2))), element.value, i), Int2::getType()));
4684 Type *Int2::getType()
4686 if(CPUID::supportsMMX2())
4688 return MMX::getType();
4692 return T(VectorType::get(Int::getType(), 2));
4698 // xy.parent = this;
4701 UInt2::UInt2(unsigned int x, unsigned int y)
4703 // xy.parent = this;
4705 int64_t constantVector[2] = {x, y};
4706 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(UInt::getType(), 2))));
4708 storeValue(Nucleus::createBitCast(vector, getType()));
4711 UInt2::UInt2(RValue<UInt2> rhs)
4713 // xy.parent = this;
4715 storeValue(rhs.value);
4718 UInt2::UInt2(const UInt2 &rhs)
4720 // xy.parent = this;
4722 Value *value = rhs.loadValue();
4726 UInt2::UInt2(const Reference<UInt2> &rhs)
4728 // xy.parent = this;
4730 Value *value = rhs.loadValue();
4734 RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs)
4736 storeValue(rhs.value);
4741 RValue<UInt2> UInt2::operator=(const UInt2 &rhs)
4743 Value *value = rhs.loadValue();
4746 return RValue<UInt2>(value);
4749 RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs)
4751 Value *value = rhs.loadValue();
4754 return RValue<UInt2>(value);
4757 RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs)
4759 if(CPUID::supportsMMX2())
4761 return As<UInt2>(x86::paddd(As<Int2>(lhs), As<Int2>(rhs)));
4765 return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
4769 RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs)
4771 if(CPUID::supportsMMX2())
4773 return As<UInt2>(x86::psubd(As<Int2>(lhs), As<Int2>(rhs)));
4777 return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
4781 // RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs)
4783 // return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value));
4786 // RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs)
4788 // return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value));
4791 // RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs)
4793 // return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value));
4796 RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs)
4798 if(CPUID::supportsMMX2())
4800 return As<UInt2>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
4804 return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
4808 RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs)
4810 if(CPUID::supportsMMX2())
4812 return As<UInt2>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
4816 return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
4820 RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs)
4822 if(CPUID::supportsMMX2())
4824 return As<UInt2>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
4828 return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
4832 RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
4834 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
4836 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
4839 RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
4841 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
4843 return x86::psrld(lhs, rhs);
4846 RValue<UInt2> operator+=(UInt2 &lhs, RValue<UInt2> rhs)
4848 return lhs = lhs + rhs;
4851 RValue<UInt2> operator-=(UInt2 &lhs, RValue<UInt2> rhs)
4853 return lhs = lhs - rhs;
4856 // RValue<UInt2> operator*=(UInt2 &lhs, RValue<UInt2> rhs)
4858 // return lhs = lhs * rhs;
4861 // RValue<UInt2> operator/=(UInt2 &lhs, RValue<UInt2> rhs)
4863 // return lhs = lhs / rhs;
4866 // RValue<UInt2> operator%=(UInt2 &lhs, RValue<UInt2> rhs)
4868 // return lhs = lhs % rhs;
4871 RValue<UInt2> operator&=(UInt2 &lhs, RValue<UInt2> rhs)
4873 return lhs = lhs & rhs;
4876 RValue<UInt2> operator|=(UInt2 &lhs, RValue<UInt2> rhs)
4878 return lhs = lhs | rhs;
4881 RValue<UInt2> operator^=(UInt2 &lhs, RValue<UInt2> rhs)
4883 return lhs = lhs ^ rhs;
4886 RValue<UInt2> operator<<=(UInt2 &lhs, unsigned char rhs)
4888 return lhs = lhs << rhs;
4891 RValue<UInt2> operator>>=(UInt2 &lhs, unsigned char rhs)
4893 return lhs = lhs >> rhs;
4896 // RValue<UInt2> operator+(RValue<UInt2> val)
4901 // RValue<UInt2> operator-(RValue<UInt2> val)
4903 // return RValue<UInt2>(Nucleus::createNeg(val.value));
4906 RValue<UInt2> operator~(RValue<UInt2> val)
4908 if(CPUID::supportsMMX2())
4910 return val ^ UInt2(0xFFFFFFFF, 0xFFFFFFFF);
4914 return RValue<UInt2>(Nucleus::createNot(val.value));
4918 Type *UInt2::getType()
4920 if(CPUID::supportsMMX2())
4922 return MMX::getType();
4926 return T(VectorType::get(UInt::getType(), 2));
4930 Int4::Int4(RValue<Byte4> cast)
4932 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
4933 Value *a = Nucleus::createInsertElement(V(UndefValue::get(Int4::getType())), x, 0);
4937 if (CPUID::supportsSSE4_1())
4939 e = x86::pmovzxbd(RValue<Int4>(a)).value;
4943 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
4944 Value *b = Nucleus::createBitCast(a, Byte16::getType());
4945 Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Byte16::getType())), swizzle);
4947 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
4948 Value *d = Nucleus::createBitCast(c, Short8::getType());
4949 e = Nucleus::createShuffleVector(d, V(Nucleus::createNullValue(Short8::getType())), swizzle2);
4952 Value *f = Nucleus::createBitCast(e, Int4::getType());
4956 Int4::Int4(RValue<SByte4> cast)
4958 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
4959 Value *a = Nucleus::createInsertElement(V(UndefValue::get(Int4::getType())), x, 0);
4963 if (CPUID::supportsSSE4_1())
4965 g = x86::pmovsxbd(RValue<Int4>(a)).value;
4969 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
4970 Value *b = Nucleus::createBitCast(a, Byte16::getType());
4971 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
4973 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
4974 Value *d = Nucleus::createBitCast(c, Short8::getType());
4975 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
4977 Value *f = Nucleus::createBitCast(e, Int4::getType());
4978 // g = Nucleus::createAShr(f, Nucleus::createConstantInt(24));
4979 g = x86::psrad(RValue<Int4>(f), 24).value;
4985 Int4::Int4(RValue<Float4> cast)
4987 // xyzw.parent = this;
4989 Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType());
4994 Int4::Int4(RValue<Short4> cast)
4996 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
4997 Value *element = Nucleus::createBitCast(cast.value, Long::getType());
4998 long2 = Nucleus::createInsertElement(long2, element, 0);
4999 RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType()));
5001 if(CPUID::supportsSSE4_1())
5003 storeValue(x86::pmovsxwd(vector).value);
5007 Value *b = Nucleus::createBitCast(vector.value, Short8::getType());
5009 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
5010 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
5011 Value *d = Nucleus::createBitCast(c, Int4::getType());
5014 // Each Short is packed into each Int in the (Short | Short) format.
5015 // Shifting by 16 will retrieve the original Short value.
5016 // Shitfing an Int will propagate the sign bit, which will work
5017 // for both positive and negative values of a Short.
5022 Int4::Int4(RValue<UShort4> cast)
5024 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
5025 Value *element = Nucleus::createBitCast(cast.value, Long::getType());
5026 long2 = Nucleus::createInsertElement(long2, element, 0);
5027 RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType()));
5029 if(CPUID::supportsSSE4_1())
5031 storeValue(x86::pmovzxwd(RValue<Int4>(vector)).value);
5035 Value *b = Nucleus::createBitCast(vector.value, Short8::getType());
5037 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
5038 Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Short8::getType())), swizzle);
5039 Value *d = Nucleus::createBitCast(c, Int4::getType());
5046 // xyzw.parent = this;
5049 Int4::Int4(int xyzw)
5051 constant(xyzw, xyzw, xyzw, xyzw);
5054 Int4::Int4(int x, int yzw)
5056 constant(x, yzw, yzw, yzw);
5059 Int4::Int4(int x, int y, int zw)
5061 constant(x, y, zw, zw);
5064 Int4::Int4(int x, int y, int z, int w)
5066 constant(x, y, z, w);
5069 void Int4::constant(int x, int y, int z, int w)
5071 // xyzw.parent = this;
5073 int64_t constantVector[4] = {x, y, z, w};
5074 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5077 Int4::Int4(RValue<Int4> rhs)
5079 // xyzw.parent = this;
5081 storeValue(rhs.value);
5084 Int4::Int4(const Int4 &rhs)
5086 // xyzw.parent = this;
5088 Value *value = rhs.loadValue();
5092 Int4::Int4(const Reference<Int4> &rhs)
5094 // xyzw.parent = this;
5096 Value *value = rhs.loadValue();
5100 Int4::Int4(RValue<UInt4> rhs)
5102 // xyzw.parent = this;
5104 storeValue(rhs.value);
5107 Int4::Int4(const UInt4 &rhs)
5109 // xyzw.parent = this;
5111 Value *value = rhs.loadValue();
5115 Int4::Int4(const Reference<UInt4> &rhs)
5117 // xyzw.parent = this;
5119 Value *value = rhs.loadValue();
5123 Int4::Int4(RValue<Int2> lo, RValue<Int2> hi)
5125 // xyzw.parent = this;
5127 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
5128 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
5130 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
5131 long2 = Nucleus::createInsertElement(long2, loLong, 0);
5132 long2 = Nucleus::createInsertElement(long2, hiLong, 1);
5133 Value *int4 = Nucleus::createBitCast(long2, Int4::getType());
5138 Int4::Int4(RValue<Int> rhs)
5140 // xyzw.parent = this;
5142 Value *vector = loadValue();
5143 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
5145 int swizzle[4] = {0, 0, 0, 0};
5146 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
5148 storeValue(replicate);
5151 Int4::Int4(const Int &rhs)
5153 // xyzw.parent = this;
5155 *this = RValue<Int>(rhs.loadValue());
5158 Int4::Int4(const Reference<Int> &rhs)
5160 // xyzw.parent = this;
5162 *this = RValue<Int>(rhs.loadValue());
5165 RValue<Int4> Int4::operator=(RValue<Int4> rhs)
5167 storeValue(rhs.value);
5172 RValue<Int4> Int4::operator=(const Int4 &rhs)
5174 Value *value = rhs.loadValue();
5177 return RValue<Int4>(value);
5180 RValue<Int4> Int4::operator=(const Reference<Int4> &rhs)
5182 Value *value = rhs.loadValue();
5185 return RValue<Int4>(value);
5188 RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs)
5190 return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value));
5193 RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs)
5195 return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value));
5198 RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs)
5200 return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value));
5203 RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs)
5205 return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value));
5208 RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs)
5210 return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value));
5213 RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs)
5215 return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value));
5218 RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs)
5220 return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value));
5223 RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs)
5225 return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value));
5228 RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
5230 return x86::pslld(lhs, rhs);
5233 RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
5235 return x86::psrad(lhs, rhs);
5238 RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
5240 return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value));
5243 RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs)
5245 return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value));
5248 RValue<Int4> operator+=(Int4 &lhs, RValue<Int4> rhs)
5250 return lhs = lhs + rhs;
5253 RValue<Int4> operator-=(Int4 &lhs, RValue<Int4> rhs)
5255 return lhs = lhs - rhs;
5258 RValue<Int4> operator*=(Int4 &lhs, RValue<Int4> rhs)
5260 return lhs = lhs * rhs;
5263 // RValue<Int4> operator/=(Int4 &lhs, RValue<Int4> rhs)
5265 // return lhs = lhs / rhs;
5268 // RValue<Int4> operator%=(Int4 &lhs, RValue<Int4> rhs)
5270 // return lhs = lhs % rhs;
5273 RValue<Int4> operator&=(Int4 &lhs, RValue<Int4> rhs)
5275 return lhs = lhs & rhs;
5278 RValue<Int4> operator|=(Int4 &lhs, RValue<Int4> rhs)
5280 return lhs = lhs | rhs;
5283 RValue<Int4> operator^=(Int4 &lhs, RValue<Int4> rhs)
5285 return lhs = lhs ^ rhs;
5288 RValue<Int4> operator<<=(Int4 &lhs, unsigned char rhs)
5290 return lhs = lhs << rhs;
5293 RValue<Int4> operator>>=(Int4 &lhs, unsigned char rhs)
5295 return lhs = lhs >> rhs;
5298 RValue<Int4> operator+(RValue<Int4> val)
5303 RValue<Int4> operator-(RValue<Int4> val)
5305 return RValue<Int4>(Nucleus::createNeg(val.value));
5308 RValue<Int4> operator~(RValue<Int4> val)
5310 return RValue<Int4>(Nucleus::createNot(val.value));
5313 RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
5315 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5316 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5317 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
5318 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5321 RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
5323 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType()));
5326 RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
5328 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5329 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5330 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType()));
5331 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5334 RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
5336 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
5339 RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
5341 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5342 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5343 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType()));
5344 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5347 RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
5349 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType()));
5352 RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
5354 if(CPUID::supportsSSE4_1())
5356 return x86::pmaxsd(x, y);
5360 RValue<Int4> greater = CmpNLE(x, y);
5361 return x & greater | y & ~greater;
5365 RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
5367 if(CPUID::supportsSSE4_1())
5369 return x86::pminsd(x, y);
5373 RValue<Int4> less = CmpLT(x, y);
5374 return x & less | y & ~less;
5378 RValue<Int4> RoundInt(RValue<Float4> cast)
5380 return x86::cvtps2dq(cast);
5383 RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y)
5385 return x86::packssdw(x, y);
5388 RValue<Int> Extract(RValue<Int4> x, int i)
5390 return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i));
5393 RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i)
5395 return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i));
5398 RValue<Int> SignMask(RValue<Int4> x)
5400 return x86::movmskps(As<Float4>(x));
5403 RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select)
5405 return RValue<Int4>(createSwizzle4(x.value, select));
5408 Type *Int4::getType()
5410 return T(VectorType::get(Int::getType(), 4));
5413 UInt4::UInt4(RValue<Float4> cast)
5415 // xyzw.parent = this;
5417 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
5418 // Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
5420 // Smallest positive value representable in UInt, but not in Int
5421 const unsigned int ustart = 0x80000000u;
5422 const float ustartf = float(ustart);
5424 // Check if the value can be represented as an Int
5425 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
5426 // If the value is too large, subtract ustart and re-add it after conversion.
5427 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
5428 // Otherwise, just convert normally
5429 (~uiValue & Int4(cast));
5430 // If the value is negative, store 0, otherwise store the result of the conversion
5431 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
5436 // xyzw.parent = this;
5439 UInt4::UInt4(int xyzw)
5441 constant(xyzw, xyzw, xyzw, xyzw);
5444 UInt4::UInt4(int x, int yzw)
5446 constant(x, yzw, yzw, yzw);
5449 UInt4::UInt4(int x, int y, int zw)
5451 constant(x, y, zw, zw);
5454 UInt4::UInt4(int x, int y, int z, int w)
5456 constant(x, y, z, w);
5459 void UInt4::constant(int x, int y, int z, int w)
5461 // xyzw.parent = this;
5463 int64_t constantVector[4] = {x, y, z, w};
5464 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5467 UInt4::UInt4(RValue<UInt4> rhs)
5469 // xyzw.parent = this;
5471 storeValue(rhs.value);
5474 UInt4::UInt4(const UInt4 &rhs)
5476 // xyzw.parent = this;
5478 Value *value = rhs.loadValue();
5482 UInt4::UInt4(const Reference<UInt4> &rhs)
5484 // xyzw.parent = this;
5486 Value *value = rhs.loadValue();
5490 UInt4::UInt4(RValue<Int4> rhs)
5492 // xyzw.parent = this;
5494 storeValue(rhs.value);
5497 UInt4::UInt4(const Int4 &rhs)
5499 // xyzw.parent = this;
5501 Value *value = rhs.loadValue();
5505 UInt4::UInt4(const Reference<Int4> &rhs)
5507 // xyzw.parent = this;
5509 Value *value = rhs.loadValue();
5513 UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi)
5515 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
5516 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
5518 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
5519 long2 = Nucleus::createInsertElement(long2, loLong, 0);
5520 long2 = Nucleus::createInsertElement(long2, hiLong, 1);
5521 Value *uint4 = Nucleus::createBitCast(long2, Int4::getType());
5526 RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs)
5528 storeValue(rhs.value);
5533 RValue<UInt4> UInt4::operator=(const UInt4 &rhs)
5535 Value *value = rhs.loadValue();
5538 return RValue<UInt4>(value);
5541 RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs)
5543 Value *value = rhs.loadValue();
5546 return RValue<UInt4>(value);
5549 RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs)
5551 return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value));
5554 RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs)
5556 return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value));
5559 RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs)
5561 return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value));
5564 RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs)
5566 return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value));
5569 RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs)
5571 return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value));
5574 RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs)
5576 return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value));
5579 RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs)
5581 return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value));
5584 RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs)
5586 return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
5589 RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
5591 return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
5594 RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
5596 return x86::psrld(lhs, rhs);
5599 RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)
5601 return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value));
5604 RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs)
5606 return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value));
5609 RValue<UInt4> operator+=(UInt4 &lhs, RValue<UInt4> rhs)
5611 return lhs = lhs + rhs;
5614 RValue<UInt4> operator-=(UInt4 &lhs, RValue<UInt4> rhs)
5616 return lhs = lhs - rhs;
5619 RValue<UInt4> operator*=(UInt4 &lhs, RValue<UInt4> rhs)
5621 return lhs = lhs * rhs;
5624 // RValue<UInt4> operator/=(UInt4 &lhs, RValue<UInt4> rhs)
5626 // return lhs = lhs / rhs;
5629 // RValue<UInt4> operator%=(UInt4 &lhs, RValue<UInt4> rhs)
5631 // return lhs = lhs % rhs;
5634 RValue<UInt4> operator&=(UInt4 &lhs, RValue<UInt4> rhs)
5636 return lhs = lhs & rhs;
5639 RValue<UInt4> operator|=(UInt4 &lhs, RValue<UInt4> rhs)
5641 return lhs = lhs | rhs;
5644 RValue<UInt4> operator^=(UInt4 &lhs, RValue<UInt4> rhs)
5646 return lhs = lhs ^ rhs;
5649 RValue<UInt4> operator<<=(UInt4 &lhs, unsigned char rhs)
5651 return lhs = lhs << rhs;
5654 RValue<UInt4> operator>>=(UInt4 &lhs, unsigned char rhs)
5656 return lhs = lhs >> rhs;
5659 RValue<UInt4> operator+(RValue<UInt4> val)
5664 RValue<UInt4> operator-(RValue<UInt4> val)
5666 return RValue<UInt4>(Nucleus::createNeg(val.value));
5669 RValue<UInt4> operator~(RValue<UInt4> val)
5671 return RValue<UInt4>(Nucleus::createNot(val.value));
5674 RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
5676 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5677 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5678 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
5679 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
5682 RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
5684 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType()));
5687 RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
5689 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5690 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5691 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType()));
5692 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
5695 RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
5697 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
5700 RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
5702 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5703 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5704 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType()));
5705 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
5708 RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
5710 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType()));
5713 RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
5715 if(CPUID::supportsSSE4_1())
5717 return x86::pmaxud(x, y);
5721 RValue<UInt4> greater = CmpNLE(x, y);
5722 return x & greater | y & ~greater;
5726 RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
5728 if(CPUID::supportsSSE4_1())
5730 return x86::pminud(x, y);
5734 RValue<UInt4> less = CmpLT(x, y);
5735 return x & less | y & ~less;
5739 RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y)
5741 return x86::packusdw(x, y); // FIXME: Fallback required
5744 Type *UInt4::getType()
5746 return T(VectorType::get(UInt::getType(), 4));
5749 Float::Float(RValue<Int> cast)
5751 Value *integer = Nucleus::createSIToFP(cast.value, Float::getType());
5753 storeValue(integer);
5761 Float::Float(float x)
5763 storeValue(Nucleus::createConstantFloat(x));
5766 Float::Float(RValue<Float> rhs)
5768 storeValue(rhs.value);
5771 Float::Float(const Float &rhs)
5773 Value *value = rhs.loadValue();
5777 Float::Float(const Reference<Float> &rhs)
5779 Value *value = rhs.loadValue();
5783 RValue<Float> Float::operator=(RValue<Float> rhs)
5785 storeValue(rhs.value);
5790 RValue<Float> Float::operator=(const Float &rhs)
5792 Value *value = rhs.loadValue();
5795 return RValue<Float>(value);
5798 RValue<Float> Float::operator=(const Reference<Float> &rhs)
5800 Value *value = rhs.loadValue();
5803 return RValue<Float>(value);
5806 RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs)
5808 return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value));
5811 RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs)
5813 return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value));
5816 RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs)
5818 return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value));
5821 RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs)
5823 return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value));
5826 RValue<Float> operator+=(Float &lhs, RValue<Float> rhs)
5828 return lhs = lhs + rhs;
5831 RValue<Float> operator-=(Float &lhs, RValue<Float> rhs)
5833 return lhs = lhs - rhs;
5836 RValue<Float> operator*=(Float &lhs, RValue<Float> rhs)
5838 return lhs = lhs * rhs;
5841 RValue<Float> operator/=(Float &lhs, RValue<Float> rhs)
5843 return lhs = lhs / rhs;
5846 RValue<Float> operator+(RValue<Float> val)
5851 RValue<Float> operator-(RValue<Float> val)
5853 return RValue<Float>(Nucleus::createFNeg(val.value));
5856 RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs)
5858 return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value));
5861 RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs)
5863 return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value));
5866 RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs)
5868 return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value));
5871 RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs)
5873 return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value));
5876 RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs)
5878 return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value));
5881 RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs)
5883 return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value));
5886 RValue<Float> Abs(RValue<Float> x)
5888 return IfThenElse(x > 0.0f, x, -x);
5891 RValue<Float> Max(RValue<Float> x, RValue<Float> y)
5893 return IfThenElse(x > y, x, y);
5896 RValue<Float> Min(RValue<Float> x, RValue<Float> y)
5898 return IfThenElse(x < y, x, y);
5901 RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
5905 // rcpss uses a piecewise-linear approximation which minimizes the relative error
5906 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
5907 return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
5911 return x86::rcpss(x);
5915 RValue<Float> RcpSqrt_pp(RValue<Float> x)
5917 return x86::rsqrtss(x);
5920 RValue<Float> Sqrt(RValue<Float> x)
5922 return x86::sqrtss(x);
5925 RValue<Float> Round(RValue<Float> x)
5927 if(CPUID::supportsSSE4_1())
5929 return x86::roundss(x, 0);
5933 return Float4(Round(Float4(x))).x;
5937 RValue<Float> Trunc(RValue<Float> x)
5939 if(CPUID::supportsSSE4_1())
5941 return x86::roundss(x, 3);
5945 return Float(Int(x)); // Rounded toward zero
5949 RValue<Float> Frac(RValue<Float> x)
5951 if(CPUID::supportsSSE4_1())
5953 return x - x86::floorss(x);
5957 return Float4(Frac(Float4(x))).x;
5961 RValue<Float> Floor(RValue<Float> x)
5963 if(CPUID::supportsSSE4_1())
5965 return x86::floorss(x);
5969 return Float4(Floor(Float4(x))).x;
5973 RValue<Float> Ceil(RValue<Float> x)
5975 if(CPUID::supportsSSE4_1())
5977 return x86::ceilss(x);
5981 return Float4(Ceil(Float4(x))).x;
5985 Type *Float::getType()
5987 return T(llvm::Type::getFloatTy(*::context));
5990 Float2::Float2(RValue<Float4> cast)
5992 // xyzw.parent = this;
5994 Value *int64x2 = Nucleus::createBitCast(cast.value, T(VectorType::get(Long::getType(), 2)));
5995 Value *int64 = Nucleus::createExtractElement(int64x2, Long::getType(), 0);
5996 Value *float2 = Nucleus::createBitCast(int64, Float2::getType());
6001 Type *Float2::getType()
6003 return T(VectorType::get(Float::getType(), 2));
6006 Float4::Float4(RValue<Byte4> cast)
6011 Value *xyzw = Nucleus::createUIToFP(cast.value, Float4::getType()); // FIXME: Crashes
6013 Value *vector = loadValue();
6015 Value *i8x = Nucleus::createExtractElement(cast.value, 0);
6016 Value *f32x = Nucleus::createUIToFP(i8x, Float::getType());
6017 Value *x = Nucleus::createInsertElement(vector, f32x, 0);
6019 Value *i8y = Nucleus::createExtractElement(cast.value, V(Nucleus::createConstantInt(1)));
6020 Value *f32y = Nucleus::createUIToFP(i8y, Float::getType());
6021 Value *xy = Nucleus::createInsertElement(x, f32y, V(Nucleus::createConstantInt(1)));
6023 Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
6024 Value *f32z = Nucleus::createUIToFP(i8z, Float::getType());
6025 Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
6027 Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
6028 Value *f32w = Nucleus::createUIToFP(i8w, Float::getType());
6029 Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
6031 Value *a = Int4(cast).loadValue();
6032 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
6038 Float4::Float4(RValue<SByte4> cast)
6043 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType()); // FIXME: Crashes
6045 Value *vector = loadValue();
6047 Value *i8x = Nucleus::createExtractElement(cast.value, 0);
6048 Value *f32x = Nucleus::createSIToFP(i8x, Float::getType());
6049 Value *x = Nucleus::createInsertElement(vector, f32x, 0);
6051 Value *i8y = Nucleus::createExtractElement(cast.value, V(Nucleus::createConstantInt(1)));
6052 Value *f32y = Nucleus::createSIToFP(i8y, Float::getType());
6053 Value *xy = Nucleus::createInsertElement(x, f32y, V(Nucleus::createConstantInt(1)));
6055 Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
6056 Value *f32z = Nucleus::createSIToFP(i8z, Float::getType());
6057 Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
6059 Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
6060 Value *f32w = Nucleus::createSIToFP(i8w, Float::getType());
6061 Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
6063 Value *a = Int4(cast).loadValue();
6064 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
6070 Float4::Float4(RValue<Short4> cast)
6075 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
6078 Float4::Float4(RValue<UShort4> cast)
6083 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
6086 Float4::Float4(RValue<Int4> cast)
6090 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());
6095 Float4::Float4(RValue<UInt4> cast)
6099 RValue<Float4> result = Float4(Int4(cast & UInt4(0x7FFFFFFF))) +
6100 As<Float4>((As<Int4>(cast) >> 31) & As<Int4>(Float4(0x80000000u)));
6102 storeValue(result.value);
6110 Float4::Float4(float xyzw)
6112 constant(xyzw, xyzw, xyzw, xyzw);
6115 Float4::Float4(float x, float yzw)
6117 constant(x, yzw, yzw, yzw);
6120 Float4::Float4(float x, float y, float zw)
6122 constant(x, y, zw, zw);
6125 Float4::Float4(float x, float y, float z, float w)
6127 constant(x, y, z, w);
6130 void Float4::constant(float x, float y, float z, float w)
6134 double constantVector[4] = {x, y, z, w};
6135 storeValue(Nucleus::createConstantVector(constantVector, getType()));
6138 Float4::Float4(RValue<Float4> rhs)
6142 storeValue(rhs.value);
6145 Float4::Float4(const Float4 &rhs)
6149 Value *value = rhs.loadValue();
6153 Float4::Float4(const Reference<Float4> &rhs)
6157 Value *value = rhs.loadValue();
6161 Float4::Float4(RValue<Float> rhs)
6165 Value *vector = loadValue();
6166 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
6168 int swizzle[4] = {0, 0, 0, 0};
6169 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
6171 storeValue(replicate);
6174 Float4::Float4(const Float &rhs)
6178 *this = RValue<Float>(rhs.loadValue());
6181 Float4::Float4(const Reference<Float> &rhs)
6185 *this = RValue<Float>(rhs.loadValue());
6188 RValue<Float4> Float4::operator=(float x)
6190 return *this = Float4(x, x, x, x);
6193 RValue<Float4> Float4::operator=(RValue<Float4> rhs)
6195 storeValue(rhs.value);
6200 RValue<Float4> Float4::operator=(const Float4 &rhs)
6202 Value *value = rhs.loadValue();
6205 return RValue<Float4>(value);
6208 RValue<Float4> Float4::operator=(const Reference<Float4> &rhs)
6210 Value *value = rhs.loadValue();
6213 return RValue<Float4>(value);
6216 RValue<Float4> Float4::operator=(RValue<Float> rhs)
6218 return *this = Float4(rhs);
6221 RValue<Float4> Float4::operator=(const Float &rhs)
6223 return *this = Float4(rhs);
6226 RValue<Float4> Float4::operator=(const Reference<Float> &rhs)
6228 return *this = Float4(rhs);
6231 RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs)
6233 return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value));
6236 RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs)
6238 return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value));
6241 RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs)
6243 return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value));
6246 RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs)
6248 return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value));
6251 RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
6253 return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value));
6256 RValue<Float4> operator+=(Float4 &lhs, RValue<Float4> rhs)
6258 return lhs = lhs + rhs;
6261 RValue<Float4> operator-=(Float4 &lhs, RValue<Float4> rhs)
6263 return lhs = lhs - rhs;
6266 RValue<Float4> operator*=(Float4 &lhs, RValue<Float4> rhs)
6268 return lhs = lhs * rhs;
6271 RValue<Float4> operator/=(Float4 &lhs, RValue<Float4> rhs)
6273 return lhs = lhs / rhs;
6276 RValue<Float4> operator%=(Float4 &lhs, RValue<Float4> rhs)
6278 return lhs = lhs % rhs;
6281 RValue<Float4> operator+(RValue<Float4> val)
6286 RValue<Float4> operator-(RValue<Float4> val)
6288 return RValue<Float4>(Nucleus::createFNeg(val.value));
6291 RValue<Float4> Abs(RValue<Float4> x)
6293 Value *vector = Nucleus::createBitCast(x.value, Int4::getType());
6294 int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
6295 Value *result = Nucleus::createAnd(vector, V(Nucleus::createConstantVector(constantVector, Int4::getType())));
6297 return RValue<Float4>(Nucleus::createBitCast(result, Float4::getType()));
6300 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
6302 return x86::maxps(x, y);
6305 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
6307 return x86::minps(x, y);
6310 RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
6314 // rcpps uses a piecewise-linear approximation which minimizes the relative error
6315 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
6316 return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
6320 return x86::rcpps(x);
6324 RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
6326 return x86::rsqrtps(x);
6329 RValue<Float4> Sqrt(RValue<Float4> x)
6331 return x86::sqrtps(x);
6334 RValue<Float4> Insert(RValue<Float4> val, RValue<Float> element, int i)
6336 return RValue<Float4>(Nucleus::createInsertElement(val.value, element.value, i));
6339 RValue<Float> Extract(RValue<Float4> x, int i)
6341 return RValue<Float>(Nucleus::createExtractElement(x.value, Float::getType(), i));
6344 RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select)
6346 return RValue<Float4>(createSwizzle4(x.value, select));
6349 RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
6353 ((imm >> 0) & 0x03) + 0,
6354 ((imm >> 2) & 0x03) + 0,
6355 ((imm >> 4) & 0x03) + 4,
6356 ((imm >> 6) & 0x03) + 4,
6359 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6362 RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y)
6364 int shuffle[4] = {0, 4, 1, 5};
6365 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6368 RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y)
6370 int shuffle[4] = {2, 6, 3, 7};
6371 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6374 RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select)
6376 Value *vector = lhs.loadValue();
6377 Value *shuffle = createMask4(vector, rhs.value, select);
6378 lhs.storeValue(shuffle);
6380 return RValue<Float4>(shuffle);
6383 RValue<Int> SignMask(RValue<Float4> x)
6385 return x86::movmskps(x);
6388 RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
6390 // return As<Int4>(x86::cmpeqps(x, y));
6391 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
6394 RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
6396 // return As<Int4>(x86::cmpltps(x, y));
6397 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
6400 RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
6402 // return As<Int4>(x86::cmpleps(x, y));
6403 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
6406 RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
6408 // return As<Int4>(x86::cmpneqps(x, y));
6409 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
6412 RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
6414 // return As<Int4>(x86::cmpnltps(x, y));
6415 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
6418 RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
6420 // return As<Int4>(x86::cmpnleps(x, y));
6421 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
6424 RValue<Float4> Round(RValue<Float4> x)
6426 if(CPUID::supportsSSE4_1())
6428 return x86::roundps(x, 0);
6432 return Float4(RoundInt(x));
6436 RValue<Float4> Trunc(RValue<Float4> x)
6438 if(CPUID::supportsSSE4_1())
6440 return x86::roundps(x, 3);
6444 return Float4(Int4(x)); // Rounded toward zero
6448 RValue<Float4> Frac(RValue<Float4> x)
6450 if(CPUID::supportsSSE4_1())
6452 return x - x86::floorps(x);
6456 Float4 frc = x - Float4(Int4(x)); // Signed fractional part
6458 return frc + As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1)));
6462 RValue<Float4> Floor(RValue<Float4> x)
6464 if(CPUID::supportsSSE4_1())
6466 return x86::floorps(x);
6474 RValue<Float4> Ceil(RValue<Float4> x)
6476 if(CPUID::supportsSSE4_1())
6478 return x86::ceilps(x);
6486 Type *Float4::getType()
6488 return T(VectorType::get(Float::getType(), 4));
6491 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
6493 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), V(Nucleus::createConstantInt(offset))));
6496 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6498 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value));
6501 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6503 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value));
6506 RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, int offset)
6508 return lhs = lhs + offset;
6511 RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<Int> offset)
6513 return lhs = lhs + offset;
6516 RValue<Pointer<Byte>> operator+=(Pointer<Byte> &lhs, RValue<UInt> offset)
6518 return lhs = lhs + offset;
6521 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset)
6523 return lhs + -offset;
6526 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6528 return lhs + -offset;
6531 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6533 return lhs + -offset;
6536 RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, int offset)
6538 return lhs = lhs - offset;
6541 RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<Int> offset)
6543 return lhs = lhs - offset;
6546 RValue<Pointer<Byte>> operator-=(Pointer<Byte> &lhs, RValue<UInt> offset)
6548 return lhs = lhs - offset;
6553 Nucleus::createRetVoid();
6554 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6555 Nucleus::createUnreachable();
6558 void Return(RValue<Int> ret)
6560 Nucleus::createRet(ret.value);
6561 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6562 Nucleus::createUnreachable();
6565 bool branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
6567 Nucleus::createCondBr(cmp.value, bodyBB, endBB);
6568 Nucleus::setInsertBlock(bodyBB);
6573 RValue<Long> Ticks()
6575 llvm::Function *rdtsc = Intrinsic::getDeclaration(::module, Intrinsic::readcyclecounter);
6577 return RValue<Long>(V(::builder->CreateCall(rdtsc)));
6585 RValue<Int> cvtss2si(RValue<Float> val)
6587 llvm::Function *cvtss2si = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cvtss2si);
6592 return RValue<Int>(V(::builder->CreateCall(cvtss2si, RValue<Float4>(vector).value)));
6595 RValue<Int2> cvtps2pi(RValue<Float4> val)
6597 llvm::Function *cvtps2pi = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cvtps2pi);
6599 return RValue<Int2>(V(::builder->CreateCall(cvtps2pi, val.value)));
6602 RValue<Int2> cvttps2pi(RValue<Float4> val)
6604 llvm::Function *cvttps2pi = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cvttps2pi);
6606 return RValue<Int2>(V(::builder->CreateCall(cvttps2pi, val.value)));
6609 RValue<Int4> cvtps2dq(RValue<Float4> val)
6611 if(CPUID::supportsSSE2())
6613 llvm::Function *cvtps2dq = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_cvtps2dq);
6615 return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, val.value)));
6619 Int2 lo = x86::cvtps2pi(val);
6620 Int2 hi = x86::cvtps2pi(Swizzle(val, 0xEE));
6622 return Int4(lo, hi);
6626 RValue<Float> rcpss(RValue<Float> val)
6628 llvm::Function *rcpss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rcp_ss);
6630 Value *vector = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), val.value, 0);
6632 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rcpss, vector)), Float::getType(), 0));
6635 RValue<Float> sqrtss(RValue<Float> val)
6637 llvm::Function *sqrtss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_sqrt_ss);
6639 Value *vector = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), val.value, 0);
6641 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(sqrtss, vector)), Float::getType(), 0));
6644 RValue<Float> rsqrtss(RValue<Float> val)
6646 llvm::Function *rsqrtss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rsqrt_ss);
6648 Value *vector = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), val.value, 0);
6650 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rsqrtss, vector)), Float::getType(), 0));
6653 RValue<Float4> rcpps(RValue<Float4> val)
6655 llvm::Function *rcpps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rcp_ps);
6657 return RValue<Float4>(V(::builder->CreateCall(rcpps, val.value)));
6660 RValue<Float4> sqrtps(RValue<Float4> val)
6662 llvm::Function *sqrtps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_sqrt_ps);
6664 return RValue<Float4>(V(::builder->CreateCall(sqrtps, val.value)));
6667 RValue<Float4> rsqrtps(RValue<Float4> val)
6669 llvm::Function *rsqrtps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rsqrt_ps);
6671 return RValue<Float4>(V(::builder->CreateCall(rsqrtps, val.value)));
6674 RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
6676 llvm::Function *maxps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_max_ps);
6678 return RValue<Float4>(V(::builder->CreateCall2(maxps, x.value, y.value)));
6681 RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
6683 llvm::Function *minps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_min_ps);
6685 return RValue<Float4>(V(::builder->CreateCall2(minps, x.value, y.value)));
6688 RValue<Float> roundss(RValue<Float> val, unsigned char imm)
6690 llvm::Function *roundss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_round_ss);
6692 Value *undef = V(UndefValue::get(Float4::getType()));
6693 Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
6695 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(roundss, undef, vector, V(Nucleus::createConstantInt(imm)))), Float::getType(), 0));
6698 RValue<Float> floorss(RValue<Float> val)
6700 return roundss(val, 1);
6703 RValue<Float> ceilss(RValue<Float> val)
6705 return roundss(val, 2);
6708 RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
6710 llvm::Function *roundps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_round_ps);
6712 return RValue<Float4>(V(::builder->CreateCall2(roundps, val.value, V(Nucleus::createConstantInt(imm)))));
6715 RValue<Float4> floorps(RValue<Float4> val)
6717 return roundps(val, 1);
6720 RValue<Float4> ceilps(RValue<Float4> val)
6722 return roundps(val, 2);
6725 RValue<Float4> cmpps(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
6727 llvm::Function *cmpps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cmp_ps);
6729 return RValue<Float4>(V(::builder->CreateCall3(cmpps, x.value, y.value, V(Nucleus::createConstantByte(imm)))));
6732 RValue<Float4> cmpeqps(RValue<Float4> x, RValue<Float4> y)
6734 return cmpps(x, y, 0);
6737 RValue<Float4> cmpltps(RValue<Float4> x, RValue<Float4> y)
6739 return cmpps(x, y, 1);
6742 RValue<Float4> cmpleps(RValue<Float4> x, RValue<Float4> y)
6744 return cmpps(x, y, 2);
6747 RValue<Float4> cmpunordps(RValue<Float4> x, RValue<Float4> y)
6749 return cmpps(x, y, 3);
6752 RValue<Float4> cmpneqps(RValue<Float4> x, RValue<Float4> y)
6754 return cmpps(x, y, 4);
6757 RValue<Float4> cmpnltps(RValue<Float4> x, RValue<Float4> y)
6759 return cmpps(x, y, 5);
6762 RValue<Float4> cmpnleps(RValue<Float4> x, RValue<Float4> y)
6764 return cmpps(x, y, 6);
6767 RValue<Float4> cmpordps(RValue<Float4> x, RValue<Float4> y)
6769 return cmpps(x, y, 7);
6772 RValue<Float> cmpss(RValue<Float> x, RValue<Float> y, unsigned char imm)
6774 llvm::Function *cmpss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cmp_ss);
6776 Value *vector1 = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), x.value, 0);
6777 Value *vector2 = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), y.value, 0);
6779 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(cmpss, vector1, vector2, V(Nucleus::createConstantByte(imm)))), Float::getType(), 0));
6782 RValue<Float> cmpeqss(RValue<Float> x, RValue<Float> y)
6784 return cmpss(x, y, 0);
6787 RValue<Float> cmpltss(RValue<Float> x, RValue<Float> y)
6789 return cmpss(x, y, 1);
6792 RValue<Float> cmpless(RValue<Float> x, RValue<Float> y)
6794 return cmpss(x, y, 2);
6797 RValue<Float> cmpunordss(RValue<Float> x, RValue<Float> y)
6799 return cmpss(x, y, 3);
6802 RValue<Float> cmpneqss(RValue<Float> x, RValue<Float> y)
6804 return cmpss(x, y, 4);
6807 RValue<Float> cmpnltss(RValue<Float> x, RValue<Float> y)
6809 return cmpss(x, y, 5);
6812 RValue<Float> cmpnless(RValue<Float> x, RValue<Float> y)
6814 return cmpss(x, y, 6);
6817 RValue<Float> cmpordss(RValue<Float> x, RValue<Float> y)
6819 return cmpss(x, y, 7);
6822 RValue<Int4> pabsd(RValue<Int4> x)
6824 llvm::Function *pabsd = Intrinsic::getDeclaration(::module, Intrinsic::x86_ssse3_pabs_d_128);
6826 return RValue<Int4>(V(::builder->CreateCall(pabsd, x.value)));
6829 RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
6831 llvm::Function *paddsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padds_w);
6833 return As<Short4>(V(::builder->CreateCall2(paddsw, As<MMX>(x).value, As<MMX>(y).value)));
6836 RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
6838 llvm::Function *psubsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubs_w);
6840 return As<Short4>(V(::builder->CreateCall2(psubsw, As<MMX>(x).value, As<MMX>(y).value)));
6843 RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
6845 llvm::Function *paddusw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_paddus_w);
6847 return As<UShort4>(V(::builder->CreateCall2(paddusw, As<MMX>(x).value, As<MMX>(y).value)));
6850 RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
6852 llvm::Function *psubusw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubus_w);
6854 return As<UShort4>(V(::builder->CreateCall2(psubusw, As<MMX>(x).value, As<MMX>(y).value)));
6857 RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
6859 llvm::Function *paddsb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padds_b);
6861 return As<SByte8>(V(::builder->CreateCall2(paddsb, As<MMX>(x).value, As<MMX>(y).value)));
6864 RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
6866 llvm::Function *psubsb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubs_b);
6868 return As<SByte8>(V(::builder->CreateCall2(psubsb, As<MMX>(x).value, As<MMX>(y).value)));
6871 RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
6873 llvm::Function *paddusb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_paddus_b);
6875 return As<Byte8>(V(::builder->CreateCall2(paddusb, As<MMX>(x).value, As<MMX>(y).value)));
6878 RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
6880 llvm::Function *psubusb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubus_b);
6882 return As<Byte8>(V(::builder->CreateCall2(psubusb, As<MMX>(x).value, As<MMX>(y).value)));
6885 RValue<Short4> paddw(RValue<Short4> x, RValue<Short4> y)
6887 llvm::Function *paddw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padd_w);
6889 return As<Short4>(V(::builder->CreateCall2(paddw, As<MMX>(x).value, As<MMX>(y).value)));
6892 RValue<Short4> psubw(RValue<Short4> x, RValue<Short4> y)
6894 llvm::Function *psubw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psub_w);
6896 return As<Short4>(V(::builder->CreateCall2(psubw, As<MMX>(x).value, As<MMX>(y).value)));
6899 RValue<Short4> pmullw(RValue<Short4> x, RValue<Short4> y)
6901 llvm::Function *pmullw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmull_w);
6903 return As<Short4>(V(::builder->CreateCall2(pmullw, As<MMX>(x).value, As<MMX>(y).value)));
6906 RValue<Short4> pand(RValue<Short4> x, RValue<Short4> y)
6908 llvm::Function *pand = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pand);
6910 return As<Short4>(V(::builder->CreateCall2(pand, As<MMX>(x).value, As<MMX>(y).value)));
6913 RValue<Short4> por(RValue<Short4> x, RValue<Short4> y)
6915 llvm::Function *por = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_por);
6917 return As<Short4>(V(::builder->CreateCall2(por, As<MMX>(x).value, As<MMX>(y).value)));
6920 RValue<Short4> pxor(RValue<Short4> x, RValue<Short4> y)
6922 llvm::Function *pxor = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pxor);
6924 return As<Short4>(V(::builder->CreateCall2(pxor, As<MMX>(x).value, As<MMX>(y).value)));
6927 RValue<Short4> pshufw(RValue<Short4> x, unsigned char y)
6929 llvm::Function *pshufw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_pshuf_w);
6931 return As<Short4>(V(::builder->CreateCall2(pshufw, As<MMX>(x).value, V(Nucleus::createConstantByte(y)))));
6934 RValue<Int2> punpcklwd(RValue<Short4> x, RValue<Short4> y)
6936 llvm::Function *punpcklwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpcklwd);
6938 return As<Int2>(V(::builder->CreateCall2(punpcklwd, As<MMX>(x).value, As<MMX>(y).value)));
6941 RValue<Int2> punpckhwd(RValue<Short4> x, RValue<Short4> y)
6943 llvm::Function *punpckhwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckhwd);
6945 return As<Int2>(V(::builder->CreateCall2(punpckhwd, As<MMX>(x).value, As<MMX>(y).value)));
6948 RValue<Short4> pinsrw(RValue<Short4> x, RValue<Int> y, unsigned int i)
6950 llvm::Function *pinsrw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pinsr_w);
6952 return As<Short4>(V(::builder->CreateCall3(pinsrw, As<MMX>(x).value, y.value, V(Nucleus::createConstantInt(i)))));
6955 RValue<Int> pextrw(RValue<Short4> x, unsigned int i)
6957 llvm::Function *pextrw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pextr_w);
6959 return RValue<Int>(V(::builder->CreateCall2(pextrw, As<MMX>(x).value, V(Nucleus::createConstantInt(i)))));
6962 RValue<Short4> punpckldq(RValue<Int2> x, RValue<Int2> y)
6964 llvm::Function *punpckldq = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckldq);
6966 return As<Short4>(V(::builder->CreateCall2(punpckldq, As<MMX>(x).value, As<MMX>(y).value)));
6969 RValue<Short4> punpckhdq(RValue<Int2> x, RValue<Int2> y)
6971 llvm::Function *punpckhdq = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckhdq);
6973 return As<Short4>(V(::builder->CreateCall2(punpckhdq, As<MMX>(x).value, As<MMX>(y).value)));
6976 RValue<Short4> punpcklbw(RValue<Byte8> x, RValue<Byte8> y)
6978 llvm::Function *punpcklbw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpcklbw);
6980 return As<Short4>(V(::builder->CreateCall2(punpcklbw, As<MMX>(x).value, As<MMX>(y).value)));
6983 RValue<Short4> punpckhbw(RValue<Byte8> x, RValue<Byte8> y)
6985 llvm::Function *punpckhbw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckhbw);
6987 return As<Short4>(V(::builder->CreateCall2(punpckhbw, As<MMX>(x).value, As<MMX>(y).value)));
6990 RValue<Byte8> paddb(RValue<Byte8> x, RValue<Byte8> y)
6992 llvm::Function *paddb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padd_b);
6994 return As<Byte8>(V(::builder->CreateCall2(paddb, As<MMX>(x).value, As<MMX>(y).value)));
6997 RValue<Byte8> psubb(RValue<Byte8> x, RValue<Byte8> y)
6999 llvm::Function *psubb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psub_b);
7001 return As<Byte8>(V(::builder->CreateCall2(psubb, As<MMX>(x).value, As<MMX>(y).value)));
7004 RValue<Int2> paddd(RValue<Int2> x, RValue<Int2> y)
7006 llvm::Function *paddd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padd_d);
7008 return As<Int2>(V(::builder->CreateCall2(paddd, As<MMX>(x).value, As<MMX>(y).value)));
7011 RValue<Int2> psubd(RValue<Int2> x, RValue<Int2> y)
7013 llvm::Function *psubd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psub_d);
7015 return As<Int2>(V(::builder->CreateCall2(psubd, As<MMX>(x).value, As<MMX>(y).value)));
7018 RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
7020 llvm::Function *pavgw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pavg_w);
7022 return As<UShort4>(V(::builder->CreateCall2(pavgw, As<MMX>(x).value, As<MMX>(y).value)));
7025 RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
7027 llvm::Function *pmaxsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmaxs_w);
7029 return As<Short4>(V(::builder->CreateCall2(pmaxsw, As<MMX>(x).value, As<MMX>(y).value)));
7032 RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
7034 llvm::Function *pminsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmins_w);
7036 return As<Short4>(V(::builder->CreateCall2(pminsw, As<MMX>(x).value, As<MMX>(y).value)));
7039 RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
7041 llvm::Function *pcmpgtw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpgt_w);
7043 return As<Short4>(V(::builder->CreateCall2(pcmpgtw, As<MMX>(x).value, As<MMX>(y).value)));
7046 RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
7048 llvm::Function *pcmpeqw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpeq_w);
7050 return As<Short4>(V(::builder->CreateCall2(pcmpeqw, As<MMX>(x).value, As<MMX>(y).value)));
7053 RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
7055 llvm::Function *pcmpgtb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpgt_b);
7057 return As<Byte8>(V(::builder->CreateCall2(pcmpgtb, As<MMX>(x).value, As<MMX>(y).value)));
7060 RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
7062 llvm::Function *pcmpeqb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpeq_b);
7064 return As<Byte8>(V(::builder->CreateCall2(pcmpeqb, As<MMX>(x).value, As<MMX>(y).value)));
7067 RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
7069 llvm::Function *packssdw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_packssdw);
7071 return As<Short4>(V(::builder->CreateCall2(packssdw, As<MMX>(x).value, As<MMX>(y).value)));
7074 RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
7076 if(CPUID::supportsSSE2())
7078 llvm::Function *packssdw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_packssdw_128);
7080 return RValue<Short8>(V(::builder->CreateCall2(packssdw, x.value, y.value)));
7085 Int2 hiX = Int2(Swizzle(x, 0xEE));
7088 Int2 hiY = Int2(Swizzle(y, 0xEE));
7090 Short4 lo = x86::packssdw(loX, hiX);
7091 Short4 hi = x86::packssdw(loY, hiY);
7093 return Short8(lo, hi);
7097 RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
7099 llvm::Function *packsswb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_packsswb);
7101 return As<SByte8>(V(::builder->CreateCall2(packsswb, As<MMX>(x).value, As<MMX>(y).value)));
7104 RValue<Byte8> packuswb(RValue<UShort4> x, RValue<UShort4> y)
7106 llvm::Function *packuswb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_packuswb);
7108 return As<Byte8>(V(::builder->CreateCall2(packuswb, As<MMX>(x).value, As<MMX>(y).value)));
7111 RValue<UShort8> packusdw(RValue<UInt4> x, RValue<UInt4> y)
7113 if(CPUID::supportsSSE4_1())
7115 llvm::Function *packusdw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_packusdw);
7117 return RValue<UShort8>(V(::builder->CreateCall2(packusdw, x.value, y.value)));
7121 // FIXME: Not an exact replacement!
7122 return As<UShort8>(packssdw(As<Int4>(x - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000)), As<Int4>(y - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000))) + Short8(0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u));
7126 RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
7128 llvm::Function *psrlw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrli_w);
7130 return As<UShort4>(V(::builder->CreateCall2(psrlw, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
7133 RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
7135 llvm::Function *psrlw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrli_w);
7137 return RValue<UShort8>(V(::builder->CreateCall2(psrlw, x.value, V(Nucleus::createConstantInt(y)))));
7140 RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
7142 llvm::Function *psraw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrai_w);
7144 return As<Short4>(V(::builder->CreateCall2(psraw, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
7147 RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
7149 llvm::Function *psraw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrai_w);
7151 return RValue<Short8>(V(::builder->CreateCall2(psraw, x.value, V(Nucleus::createConstantInt(y)))));
7154 RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
7156 llvm::Function *psllw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pslli_w);
7158 return As<Short4>(V(::builder->CreateCall2(psllw, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
7161 RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
7163 llvm::Function *psllw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pslli_w);
7165 return RValue<Short8>(V(::builder->CreateCall2(psllw, x.value, V(Nucleus::createConstantInt(y)))));
7168 RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
7170 llvm::Function *pslld = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pslli_d);
7172 return As<Int2>(V(::builder->CreateCall2(pslld, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
7175 RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
7177 if(CPUID::supportsSSE2())
7179 llvm::Function *pslld = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pslli_d);
7181 return RValue<Int4>(V(::builder->CreateCall2(pslld, x.value, V(Nucleus::createConstantInt(y)))));
7186 Int2 hi = Int2(Swizzle(x, 0xEE));
7188 lo = x86::pslld(lo, y);
7189 hi = x86::pslld(hi, y);
7191 return Int4(lo, hi);
7195 RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
7197 llvm::Function *psrad = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrai_d);
7199 return As<Int2>(V(::builder->CreateCall2(psrad, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
7202 RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
7204 if(CPUID::supportsSSE2())
7206 llvm::Function *psrad = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrai_d);
7208 return RValue<Int4>(V(::builder->CreateCall2(psrad, x.value, V(Nucleus::createConstantInt(y)))));
7213 Int2 hi = Int2(Swizzle(x, 0xEE));
7215 lo = x86::psrad(lo, y);
7216 hi = x86::psrad(hi, y);
7218 return Int4(lo, hi);
7222 RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
7224 llvm::Function *psrld = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrli_d);
7226 return As<UInt2>(V(::builder->CreateCall2(psrld, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
7229 RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
7231 if(CPUID::supportsSSE2())
7233 llvm::Function *psrld = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrli_d);
7235 return RValue<UInt4>(V(::builder->CreateCall2(psrld, x.value, V(Nucleus::createConstantInt(y)))));
7239 UInt2 lo = As<UInt2>(Int2(As<Int4>(x)));
7240 UInt2 hi = As<UInt2>(Int2(Swizzle(As<Int4>(x), 0xEE)));
7242 lo = x86::psrld(lo, y);
7243 hi = x86::psrld(hi, y);
7245 return UInt4(lo, hi);
7249 RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
7251 llvm::Function *pmaxsd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmaxsd);
7253 return RValue<Int4>(V(::builder->CreateCall2(pmaxsd, x.value, y.value)));
7256 RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
7258 llvm::Function *pminsd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pminsd);
7260 return RValue<Int4>(V(::builder->CreateCall2(pminsd, x.value, y.value)));
7263 RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
7265 llvm::Function *pmaxud = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmaxud);
7267 return RValue<UInt4>(V(::builder->CreateCall2(pmaxud, x.value, y.value)));
7270 RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
7272 llvm::Function *pminud = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pminud);
7274 return RValue<UInt4>(V(::builder->CreateCall2(pminud, x.value, y.value)));
7277 RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
7279 llvm::Function *pmulhw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmulh_w);
7281 return As<Short4>(V(::builder->CreateCall2(pmulhw, As<MMX>(x).value, As<MMX>(y).value)));
7284 RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
7286 llvm::Function *pmulhuw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmulhu_w);
7288 return As<UShort4>(V(::builder->CreateCall2(pmulhuw, As<MMX>(x).value, As<MMX>(y).value)));
7291 RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
7293 llvm::Function *pmaddwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmadd_wd);
7295 return As<Int2>(V(::builder->CreateCall2(pmaddwd, As<MMX>(x).value, As<MMX>(y).value)));
7298 RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
7300 llvm::Function *pmulhw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pmulh_w);
7302 return RValue<Short8>(V(::builder->CreateCall2(pmulhw, x.value, y.value)));
7305 RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
7307 llvm::Function *pmulhuw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pmulhu_w);
7309 return RValue<UShort8>(V(::builder->CreateCall2(pmulhuw, x.value, y.value)));
7312 RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
7314 llvm::Function *pmaddwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pmadd_wd);
7316 return RValue<Int4>(V(::builder->CreateCall2(pmaddwd, x.value, y.value)));
7319 RValue<Int> movmskps(RValue<Float4> x)
7321 llvm::Function *movmskps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_movmsk_ps);
7323 return RValue<Int>(V(::builder->CreateCall(movmskps, x.value)));
7326 RValue<Int> pmovmskb(RValue<Byte8> x)
7328 llvm::Function *pmovmskb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmovmskb);
7330 return RValue<Int>(V(::builder->CreateCall(pmovmskb, As<MMX>(x).value)));
7333 //RValue<Int2> movd(RValue<Pointer<Int>> x)
7335 // Value *element = Nucleus::createLoad(x.value);
7337 //// Value *int2 = UndefValue::get(Int2::getType());
7338 //// int2 = Nucleus::createInsertElement(int2, element, ConstantInt::get(Int::getType(), 0));
7340 // Value *int2 = Nucleus::createBitCast(Nucleus::createZExt(element, Long::getType()), Int2::getType());
7342 // return RValue<Int2>(int2);
7345 //RValue<Int2> movdq2q(RValue<Int4> x)
7347 // Value *long2 = Nucleus::createBitCast(x.value, T(VectorType::get(Long::getType(), 2)));
7348 // Value *element = Nucleus::createExtractElement(long2, ConstantInt::get(Int::getType(), 0));
7350 // return RValue<Int2>(Nucleus::createBitCast(element, Int2::getType()));
7353 RValue<Int4> pmovzxbd(RValue<Int4> x)
7355 llvm::Function *pmovzxbd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovzxbd);
7357 return RValue<Int4>(V(::builder->CreateCall(pmovzxbd, Nucleus::createBitCast(x.value, Byte16::getType()))));
7360 RValue<Int4> pmovsxbd(RValue<Int4> x)
7362 llvm::Function *pmovsxbd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovsxbd);
7364 return RValue<Int4>(V(::builder->CreateCall(pmovsxbd, Nucleus::createBitCast(x.value, SByte16::getType()))));
7367 RValue<Int4> pmovzxwd(RValue<Int4> x)
7369 llvm::Function *pmovzxwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovzxwd);
7371 return RValue<Int4>(V(::builder->CreateCall(pmovzxwd, Nucleus::createBitCast(x.value, UShort8::getType()))));
7374 RValue<Int4> pmovsxwd(RValue<Int4> x)
7376 llvm::Function *pmovsxwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovsxwd);
7378 return RValue<Int4>(V(::builder->CreateCall(pmovsxwd, Nucleus::createBitCast(x.value, Short8::getType()))));
7383 llvm::Function *emms = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_emms);
7385 V(::builder->CreateCall(emms));