1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "Nucleus.hpp"
17 #include "llvm/Support/IRBuilder.h"
18 #include "llvm/Function.h"
19 #include "llvm/GlobalVariable.h"
20 #include "llvm/Module.h"
21 #include "llvm/LLVMContext.h"
22 #include "llvm/Constants.h"
23 #include "llvm/Intrinsics.h"
24 #include "llvm/PassManager.h"
25 #include "llvm/Analysis/LoopPass.h"
26 #include "llvm/Transforms/Scalar.h"
27 #include "llvm/Target/TargetData.h"
28 #include "llvm/Target/TargetOptions.h"
29 #include "llvm/Support/TargetSelect.h"
30 #include "../lib/ExecutionEngine/JIT/JIT.h"
32 #include "LLVMRoutine.hpp"
33 #include "LLVMRoutineManager.hpp"
38 #include "MutexLock.hpp"
40 #include <xmmintrin.h>
43 #if defined(__x86_64__) && defined(_WIN32)
44 extern "C" void X86CompilationCallback()
46 assert(false); // UNIMPLEMENTED
52 bool (*CodeAnalystInitialize)() = 0;
53 void (*CodeAnalystCompleteJITLog)() = 0;
54 bool (*CodeAnalystLogJITCode)(const void *jitCodeStartAddr, unsigned int jitCodeSize, const wchar_t *functionName) = 0;
59 extern bool JITEmitDebugInfo;
64 sw::LLVMRoutineManager *routineManager = nullptr;
65 llvm::ExecutionEngine *executionEngine = nullptr;
66 llvm::IRBuilder<> *builder = nullptr;
67 llvm::LLVMContext *context = nullptr;
68 llvm::Module *module = nullptr;
69 llvm::Function *function = nullptr;
71 sw::BackoffLock codegenMutex;
78 Optimization optimization[10] = {InstructionCombining, Disabled};
80 class Type : public llvm::Type {};
81 class Value : public llvm::Value {};
82 class Constant : public llvm::Constant {};
83 class BasicBlock : public llvm::BasicBlock {};
85 inline Type *T(llvm::Type *t)
87 return reinterpret_cast<Type*>(t);
90 inline Value *V(llvm::Value *t)
92 return reinterpret_cast<Value*>(t);
95 inline std::vector<llvm::Type*> &T(std::vector<Type*> &t)
97 return reinterpret_cast<std::vector<llvm::Type*>&>(t);
100 inline Constant *C(llvm::Constant *c)
102 return reinterpret_cast<Constant*>(c);
105 inline BasicBlock *B(llvm::BasicBlock *t)
107 return reinterpret_cast<BasicBlock*>(t);
112 ::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe
114 InitializeNativeTarget();
115 JITEmitDebugInfo = false;
119 ::context = new LLVMContext();
122 ::module = new Module("", *::context);
123 ::routineManager = new LLVMRoutineManager();
125 #if defined(__x86_64__)
126 const char *architecture = "x86-64";
128 const char *architecture = "x86";
131 SmallVector<std::string, 1> MAttrs;
132 MAttrs.push_back(CPUID::supportsMMX() ? "+mmx" : "-mmx");
133 MAttrs.push_back(CPUID::supportsCMOV() ? "+cmov" : "-cmov");
134 MAttrs.push_back(CPUID::supportsSSE() ? "+sse" : "-sse");
135 MAttrs.push_back(CPUID::supportsSSE2() ? "+sse2" : "-sse2");
136 MAttrs.push_back(CPUID::supportsSSE3() ? "+sse3" : "-sse3");
137 MAttrs.push_back(CPUID::supportsSSSE3() ? "+ssse3" : "-ssse3");
138 MAttrs.push_back(CPUID::supportsSSE4_1() ? "+sse41" : "-sse41");
141 TargetMachine *targetMachine = EngineBuilder::selectTarget(::module, architecture, "", MAttrs, Reloc::Default, CodeModel::JITDefault, &error);
142 ::executionEngine = JIT::createJIT(::module, 0, ::routineManager, CodeGenOpt::Aggressive, true, targetMachine);
146 ::builder = new IRBuilder<>(*::context);
149 HMODULE CodeAnalyst = LoadLibrary("CAJitNtfyLib.dll");
152 CodeAnalystInitialize = (bool(*)())GetProcAddress(CodeAnalyst, "CAJIT_Initialize");
153 CodeAnalystCompleteJITLog = (void(*)())GetProcAddress(CodeAnalyst, "CAJIT_CompleteJITLog");
154 CodeAnalystLogJITCode = (bool(*)(const void*, unsigned int, const wchar_t*))GetProcAddress(CodeAnalyst, "CAJIT_LogJITCode");
156 CodeAnalystInitialize();
164 delete ::executionEngine;
165 ::executionEngine = nullptr;
167 ::routineManager = nullptr;
168 ::function = nullptr;
171 ::codegenMutex.unlock();
174 Routine *Nucleus::acquireRoutine(const wchar_t *name, bool runOptimizations)
176 if(::builder->GetInsertBlock()->empty() || !::builder->GetInsertBlock()->back().isTerminator())
178 llvm::Type *type = ::function->getReturnType();
186 createRet(V(UndefValue::get(type)));
193 raw_fd_ostream file("llvm-dump-unopt.txt", error);
194 ::module->print(file, 0);
205 raw_fd_ostream file("llvm-dump-opt.txt", error);
206 ::module->print(file, 0);
209 void *entry = ::executionEngine->getPointerToFunction(::function);
210 LLVMRoutine *routine = ::routineManager->acquireRoutine(entry);
212 if(CodeAnalystLogJITCode)
214 CodeAnalystLogJITCode(routine->getEntry(), routine->getCodeSize(), name);
220 void Nucleus::optimize()
222 static PassManager *passManager = nullptr;
226 passManager = new PassManager();
229 // NoInfsFPMath = true;
230 // NoNaNsFPMath = true;
232 passManager->add(new TargetData(*::executionEngine->getTargetData()));
233 passManager->add(createScalarReplAggregatesPass());
235 for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
237 switch(optimization[pass])
239 case Disabled: break;
240 case CFGSimplification: passManager->add(createCFGSimplificationPass()); break;
241 case LICM: passManager->add(createLICMPass()); break;
242 case AggressiveDCE: passManager->add(createAggressiveDCEPass()); break;
243 case GVN: passManager->add(createGVNPass()); break;
244 case InstructionCombining: passManager->add(createInstructionCombiningPass()); break;
245 case Reassociate: passManager->add(createReassociatePass()); break;
246 case DeadStoreElimination: passManager->add(createDeadStoreEliminationPass()); break;
247 case SCCP: passManager->add(createSCCPPass()); break;
248 case ScalarReplAggregates: passManager->add(createScalarReplAggregatesPass()); break;
255 passManager->run(*::module);
258 Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
260 // Need to allocate it in the entry block for mem2reg to work
261 llvm::BasicBlock &entryBlock = ::function->getEntryBlock();
263 Instruction *declaration;
267 declaration = new AllocaInst(type, Nucleus::createConstantInt(arraySize));
271 declaration = new AllocaInst(type, (Value*)0);
274 entryBlock.getInstList().push_front(declaration);
276 return V(declaration);
279 BasicBlock *Nucleus::createBasicBlock()
281 return B(BasicBlock::Create(*::context, "", ::function));
284 BasicBlock *Nucleus::getInsertBlock()
286 return B(::builder->GetInsertBlock());
289 void Nucleus::setInsertBlock(BasicBlock *basicBlock)
291 // assert(::builder->GetInsertBlock()->back().isTerminator());
292 return ::builder->SetInsertPoint(basicBlock);
295 BasicBlock *Nucleus::getPredecessor(BasicBlock *basicBlock)
297 return B(*pred_begin(basicBlock));
300 void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
302 llvm::FunctionType *functionType = llvm::FunctionType::get(ReturnType, T(Params), false);
303 ::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", ::module);
304 ::function->setCallingConv(llvm::CallingConv::C);
306 ::builder->SetInsertPoint(BasicBlock::Create(*::context, "", ::function));
309 Value *Nucleus::getArgument(unsigned int index)
311 llvm::Function::arg_iterator args = ::function->arg_begin();
322 void Nucleus::createRetVoid()
326 ::builder->CreateRetVoid();
329 void Nucleus::createRet(Value *v)
333 ::builder->CreateRet(v);
336 void Nucleus::createBr(BasicBlock *dest)
338 ::builder->CreateBr(dest);
341 void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
343 ::builder->CreateCondBr(cond, ifTrue, ifFalse);
346 Value *Nucleus::createAdd(Value *lhs, Value *rhs)
348 return V(::builder->CreateAdd(lhs, rhs));
351 Value *Nucleus::createSub(Value *lhs, Value *rhs)
353 return V(::builder->CreateSub(lhs, rhs));
356 Value *Nucleus::createMul(Value *lhs, Value *rhs)
358 return V(::builder->CreateMul(lhs, rhs));
361 Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
363 return V(::builder->CreateUDiv(lhs, rhs));
366 Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
368 return V(::builder->CreateSDiv(lhs, rhs));
371 Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
373 return V(::builder->CreateFAdd(lhs, rhs));
376 Value *Nucleus::createFSub(Value *lhs, Value *rhs)
378 return V(::builder->CreateFSub(lhs, rhs));
381 Value *Nucleus::createFMul(Value *lhs, Value *rhs)
383 return V(::builder->CreateFMul(lhs, rhs));
386 Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
388 return V(::builder->CreateFDiv(lhs, rhs));
391 Value *Nucleus::createURem(Value *lhs, Value *rhs)
393 return V(::builder->CreateURem(lhs, rhs));
396 Value *Nucleus::createSRem(Value *lhs, Value *rhs)
398 return V(::builder->CreateSRem(lhs, rhs));
401 Value *Nucleus::createFRem(Value *lhs, Value *rhs)
403 return V(::builder->CreateFRem(lhs, rhs));
406 Value *Nucleus::createShl(Value *lhs, Value *rhs)
408 return V(::builder->CreateShl(lhs, rhs));
411 Value *Nucleus::createLShr(Value *lhs, Value *rhs)
413 return V(::builder->CreateLShr(lhs, rhs));
416 Value *Nucleus::createAShr(Value *lhs, Value *rhs)
418 return V(::builder->CreateAShr(lhs, rhs));
421 Value *Nucleus::createAnd(Value *lhs, Value *rhs)
423 return V(::builder->CreateAnd(lhs, rhs));
426 Value *Nucleus::createOr(Value *lhs, Value *rhs)
428 return V(::builder->CreateOr(lhs, rhs));
431 Value *Nucleus::createXor(Value *lhs, Value *rhs)
433 return V(::builder->CreateXor(lhs, rhs));
436 Value *Nucleus::createAssign(Constant *constant)
441 Value *Nucleus::createNeg(Value *v)
443 return V(::builder->CreateNeg(v));
446 Value *Nucleus::createFNeg(Value *v)
448 return V(::builder->CreateFNeg(v));
451 Value *Nucleus::createNot(Value *v)
453 return V(::builder->CreateNot(v));
456 Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align)
458 assert(ptr->getType()->getContainedType(0) == type);
459 return V(::builder->Insert(new LoadInst(ptr, "", isVolatile, align)));
462 Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align)
464 assert(ptr->getType()->getContainedType(0) == type);
465 ::builder->Insert(new StoreInst(value, ptr, isVolatile, align));
469 Constant *Nucleus::createStore(Constant *constant, Value *ptr, Type *type, bool isVolatile, unsigned int align)
471 assert(ptr->getType()->getContainedType(0) == type);
472 ::builder->Insert(new StoreInst(constant, ptr, isVolatile, align));
476 Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index)
478 assert(ptr->getType()->getContainedType(0) == type);
479 return V(::builder->CreateGEP(ptr, index));
482 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
484 return V(::builder->CreateAtomicRMW(AtomicRMWInst::Add, ptr, value, SequentiallyConsistent));
487 Value *Nucleus::createTrunc(Value *v, Type *destType)
489 return V(::builder->CreateTrunc(v, destType));
492 Value *Nucleus::createZExt(Value *v, Type *destType)
494 return V(::builder->CreateZExt(v, destType));
497 Value *Nucleus::createSExt(Value *v, Type *destType)
499 return V(::builder->CreateSExt(v, destType));
502 Value *Nucleus::createFPToSI(Value *v, Type *destType)
504 return V(::builder->CreateFPToSI(v, destType));
507 Value *Nucleus::createUIToFP(Value *v, Type *destType)
509 return V(::builder->CreateUIToFP(v, destType));
512 Value *Nucleus::createSIToFP(Value *v, Type *destType)
514 return V(::builder->CreateSIToFP(v, destType));
517 Value *Nucleus::createFPTrunc(Value *v, Type *destType)
519 return V(::builder->CreateFPTrunc(v, destType));
522 Value *Nucleus::createFPExt(Value *v, Type *destType)
524 return V(::builder->CreateFPExt(v, destType));
527 Value *Nucleus::createPtrToInt(Value *v, Type *destType)
529 return V(::builder->CreatePtrToInt(v, destType));
532 Value *Nucleus::createIntToPtr(Value *v, Type *destType)
534 return V(::builder->CreateIntToPtr(v, destType));
537 Value *Nucleus::createBitCast(Value *v, Type *destType)
539 return V(::builder->CreateBitCast(v, destType));
542 Value *Nucleus::createIntCast(Value *v, Type *destType, bool isSigned)
544 return V(::builder->CreateIntCast(v, destType, isSigned));
547 Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
549 return V(::builder->CreateICmpEQ(lhs, rhs));
552 Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
554 return V(::builder->CreateICmpNE(lhs, rhs));
557 Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
559 return V(::builder->CreateICmpUGT(lhs, rhs));
562 Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
564 return V(::builder->CreateICmpUGE(lhs, rhs));
567 Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
569 return V(::builder->CreateICmpULT(lhs, rhs));
572 Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
574 return V(::builder->CreateICmpULE(lhs, rhs));
577 Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
579 return V(::builder->CreateICmpSGT(lhs, rhs));
582 Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
584 return V(::builder->CreateICmpSGE(lhs, rhs));
587 Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
589 return V(::builder->CreateICmpSLT(lhs, rhs));
592 Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
594 return V(::builder->CreateICmpSLE(lhs, rhs));
597 Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
599 return V(::builder->CreateFCmpOEQ(lhs, rhs));
602 Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
604 return V(::builder->CreateFCmpOGT(lhs, rhs));
607 Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
609 return V(::builder->CreateFCmpOGE(lhs, rhs));
612 Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
614 return V(::builder->CreateFCmpOLT(lhs, rhs));
617 Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
619 return V(::builder->CreateFCmpOLE(lhs, rhs));
622 Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
624 return V(::builder->CreateFCmpONE(lhs, rhs));
627 Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
629 return V(::builder->CreateFCmpORD(lhs, rhs));
632 Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
634 return V(::builder->CreateFCmpUNO(lhs, rhs));
637 Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
639 return V(::builder->CreateFCmpUEQ(lhs, rhs));
642 Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
644 return V(::builder->CreateFCmpUGT(lhs, rhs));
647 Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
649 return V(::builder->CreateFCmpUGE(lhs, rhs));
652 Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
654 return V(::builder->CreateFCmpULT(lhs, rhs));
657 Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
659 return V(::builder->CreateFCmpULE(lhs, rhs));
662 Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
664 return V(::builder->CreateFCmpULE(lhs, rhs));
667 Value *Nucleus::createExtractElement(Value *vector, int index)
669 return V(::builder->CreateExtractElement(vector, createConstantInt(index)));
672 Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
674 return V(::builder->CreateInsertElement(vector, element, createConstantInt(index)));
677 Value *Nucleus::createShuffleVector(Value *V1, Value *V2, Value *mask)
679 return V(::builder->CreateShuffleVector(V1, V2, mask));
682 Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
684 return V(::builder->CreateSelect(C, ifTrue, ifFalse));
687 Value *Nucleus::createSwitch(Value *v, BasicBlock *Dest, unsigned NumCases)
689 return V(::builder->CreateSwitch(v, Dest, NumCases));
692 void Nucleus::addSwitchCase(Value *Switch, int Case, BasicBlock *Branch)
694 reinterpret_cast<SwitchInst*>(Switch)->addCase(llvm::ConstantInt::get(Type::getInt32Ty(*::context), Case, true), Branch);
697 void Nucleus::createUnreachable()
699 ::builder->CreateUnreachable();
702 Value *Nucleus::createSwizzle(Value *val, unsigned char select)
704 Constant *swizzle[4];
705 swizzle[0] = Nucleus::createConstantInt((select >> 0) & 0x03);
706 swizzle[1] = Nucleus::createConstantInt((select >> 2) & 0x03);
707 swizzle[2] = Nucleus::createConstantInt((select >> 4) & 0x03);
708 swizzle[3] = Nucleus::createConstantInt((select >> 6) & 0x03);
710 Value *shuffle = Nucleus::createShuffleVector(val, V(UndefValue::get(val->getType())), V(Nucleus::createConstantVector(swizzle, 4)));
715 Value *Nucleus::createMask(Value *lhs, Value *rhs, unsigned char select)
717 bool mask[4] = {false, false, false, false};
719 mask[(select >> 0) & 0x03] = true;
720 mask[(select >> 2) & 0x03] = true;
721 mask[(select >> 4) & 0x03] = true;
722 mask[(select >> 6) & 0x03] = true;
724 Constant *swizzle[4];
725 swizzle[0] = Nucleus::createConstantInt(mask[0] ? 4 : 0);
726 swizzle[1] = Nucleus::createConstantInt(mask[1] ? 5 : 1);
727 swizzle[2] = Nucleus::createConstantInt(mask[2] ? 6 : 2);
728 swizzle[3] = Nucleus::createConstantInt(mask[3] ? 7 : 3);
730 Value *shuffle = Nucleus::createShuffleVector(lhs, rhs, V(Nucleus::createConstantVector(swizzle, 4)));
735 Constant *Nucleus::createConstantPointer(const void *address, Type *Ty, bool isConstant, unsigned int Align)
737 const GlobalValue *existingGlobal = ::executionEngine->getGlobalValueAtAddress(const_cast<void*>(address)); // FIXME: Const
741 return (Constant*)existingGlobal;
744 llvm::GlobalValue *global = new llvm::GlobalVariable(*::module, Ty, isConstant, llvm::GlobalValue::ExternalLinkage, 0, "");
746 global->setAlignment(Align);
748 ::executionEngine->addGlobalMapping(global, const_cast<void*>(address));
753 Type *Nucleus::getPointerType(Type *ElementType)
755 return T(llvm::PointerType::get(ElementType, 0));
758 Constant *Nucleus::createNullValue(Type *Ty)
760 return C(llvm::Constant::getNullValue(Ty));
763 Constant *Nucleus::createConstantInt(int64_t i)
765 return C(llvm::ConstantInt::get(Type::getInt64Ty(*::context), i, true));
768 Constant *Nucleus::createConstantInt(int i)
770 return C(llvm::ConstantInt::get(Type::getInt32Ty(*::context), i, true));
773 Constant *Nucleus::createConstantInt(unsigned int i)
775 return C(llvm::ConstantInt::get(Type::getInt32Ty(*::context), i, false));
778 Constant *Nucleus::createConstantBool(bool b)
780 return C(llvm::ConstantInt::get(Type::getInt1Ty(*::context), b));
783 Constant *Nucleus::createConstantByte(signed char i)
785 return C(llvm::ConstantInt::get(Type::getInt8Ty(*::context), i, true));
788 Constant *Nucleus::createConstantByte(unsigned char i)
790 return C(llvm::ConstantInt::get(Type::getInt8Ty(*::context), i, false));
793 Constant *Nucleus::createConstantShort(short i)
795 return C(llvm::ConstantInt::get(Type::getInt16Ty(*::context), i, true));
798 Constant *Nucleus::createConstantShort(unsigned short i)
800 return C(llvm::ConstantInt::get(Type::getInt16Ty(*::context), i, false));
803 Constant *Nucleus::createConstantFloat(float x)
805 return C(ConstantFP::get(Float::getType(), x));
808 Constant *Nucleus::createNullPointer(Type *Ty)
810 return C(llvm::ConstantPointerNull::get(llvm::PointerType::get(Ty, 0)));
813 Constant *Nucleus::createConstantVector(Constant *const *Vals, unsigned NumVals)
815 return C(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(reinterpret_cast<llvm::Constant *const*>(Vals), NumVals)));
818 Type *Void::getType()
820 return T(llvm::Type::getVoidTy(*::context));
823 class MMX : public Variable<MMX>
826 static Type *getType();
831 return T(llvm::Type::getX86_MMXTy(*::context));
834 Bool::Bool(Argument<Bool> argument)
836 storeValue(argument.value);
845 storeValue(Nucleus::createConstantBool(x));
848 Bool::Bool(RValue<Bool> rhs)
850 storeValue(rhs.value);
853 Bool::Bool(const Bool &rhs)
855 Value *value = rhs.loadValue();
859 Bool::Bool(const Reference<Bool> &rhs)
861 Value *value = rhs.loadValue();
865 RValue<Bool> Bool::operator=(RValue<Bool> rhs) const
867 storeValue(rhs.value);
872 RValue<Bool> Bool::operator=(const Bool &rhs) const
874 Value *value = rhs.loadValue();
877 return RValue<Bool>(value);
880 RValue<Bool> Bool::operator=(const Reference<Bool> &rhs) const
882 Value *value = rhs.loadValue();
885 return RValue<Bool>(value);
888 RValue<Bool> operator!(RValue<Bool> val)
890 return RValue<Bool>(Nucleus::createNot(val.value));
893 RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs)
895 return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value));
898 RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs)
900 return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value));
903 Type *Bool::getType()
905 return T(llvm::Type::getInt1Ty(*::context));
908 Byte::Byte(Argument<Byte> argument)
910 storeValue(argument.value);
913 Byte::Byte(RValue<Int> cast)
915 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
920 Byte::Byte(RValue<UInt> cast)
922 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
927 Byte::Byte(RValue<UShort> cast)
929 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
940 storeValue(Nucleus::createConstantByte((unsigned char)x));
943 Byte::Byte(unsigned char x)
945 storeValue(Nucleus::createConstantByte(x));
948 Byte::Byte(RValue<Byte> rhs)
950 storeValue(rhs.value);
953 Byte::Byte(const Byte &rhs)
955 Value *value = rhs.loadValue();
959 Byte::Byte(const Reference<Byte> &rhs)
961 Value *value = rhs.loadValue();
965 RValue<Byte> Byte::operator=(RValue<Byte> rhs) const
967 storeValue(rhs.value);
972 RValue<Byte> Byte::operator=(const Byte &rhs) const
974 Value *value = rhs.loadValue();
977 return RValue<Byte>(value);
980 RValue<Byte> Byte::operator=(const Reference<Byte> &rhs) const
982 Value *value = rhs.loadValue();
985 return RValue<Byte>(value);
988 RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs)
990 return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value));
993 RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs)
995 return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value));
998 RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs)
1000 return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value));
1003 RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs)
1005 return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value));
1008 RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs)
1010 return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value));
1013 RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs)
1015 return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value));
1018 RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs)
1020 return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value));
1023 RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs)
1025 return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value));
1028 RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs)
1030 return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value));
1033 RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs)
1035 return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value));
1038 RValue<Byte> operator+=(const Byte &lhs, RValue<Byte> rhs)
1040 return lhs = lhs + rhs;
1043 RValue<Byte> operator-=(const Byte &lhs, RValue<Byte> rhs)
1045 return lhs = lhs - rhs;
1048 RValue<Byte> operator*=(const Byte &lhs, RValue<Byte> rhs)
1050 return lhs = lhs * rhs;
1053 RValue<Byte> operator/=(const Byte &lhs, RValue<Byte> rhs)
1055 return lhs = lhs / rhs;
1058 RValue<Byte> operator%=(const Byte &lhs, RValue<Byte> rhs)
1060 return lhs = lhs % rhs;
1063 RValue<Byte> operator&=(const Byte &lhs, RValue<Byte> rhs)
1065 return lhs = lhs & rhs;
1068 RValue<Byte> operator|=(const Byte &lhs, RValue<Byte> rhs)
1070 return lhs = lhs | rhs;
1073 RValue<Byte> operator^=(const Byte &lhs, RValue<Byte> rhs)
1075 return lhs = lhs ^ rhs;
1078 RValue<Byte> operator<<=(const Byte &lhs, RValue<Byte> rhs)
1080 return lhs = lhs << rhs;
1083 RValue<Byte> operator>>=(const Byte &lhs, RValue<Byte> rhs)
1085 return lhs = lhs >> rhs;
1088 RValue<Byte> operator+(RValue<Byte> val)
1093 RValue<Byte> operator-(RValue<Byte> val)
1095 return RValue<Byte>(Nucleus::createNeg(val.value));
1098 RValue<Byte> operator~(RValue<Byte> val)
1100 return RValue<Byte>(Nucleus::createNot(val.value));
1103 RValue<Byte> operator++(const Byte &val, int) // Post-increment
1105 RValue<Byte> res = val;
1107 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantByte((unsigned char)1)));
1108 val.storeValue(inc);
1113 const Byte &operator++(const Byte &val) // Pre-increment
1115 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantByte((unsigned char)1)));
1116 val.storeValue(inc);
1121 RValue<Byte> operator--(const Byte &val, int) // Post-decrement
1123 RValue<Byte> res = val;
1125 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantByte((unsigned char)1)));
1126 val.storeValue(inc);
1131 const Byte &operator--(const Byte &val) // Pre-decrement
1133 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantByte((unsigned char)1)));
1134 val.storeValue(inc);
1139 RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs)
1141 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1144 RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs)
1146 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1149 RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs)
1151 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1154 RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs)
1156 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1159 RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs)
1161 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1164 RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs)
1166 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1169 Type *Byte::getType()
1171 return T(llvm::Type::getInt8Ty(*::context));
1174 SByte::SByte(Argument<SByte> argument)
1176 storeValue(argument.value);
1179 SByte::SByte(RValue<Int> cast)
1181 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1183 storeValue(integer);
1186 SByte::SByte(RValue<Short> cast)
1188 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1190 storeValue(integer);
1197 SByte::SByte(signed char x)
1199 storeValue(Nucleus::createConstantByte(x));
1202 SByte::SByte(RValue<SByte> rhs)
1204 storeValue(rhs.value);
1207 SByte::SByte(const SByte &rhs)
1209 Value *value = rhs.loadValue();
1213 SByte::SByte(const Reference<SByte> &rhs)
1215 Value *value = rhs.loadValue();
1219 RValue<SByte> SByte::operator=(RValue<SByte> rhs) const
1221 storeValue(rhs.value);
1226 RValue<SByte> SByte::operator=(const SByte &rhs) const
1228 Value *value = rhs.loadValue();
1231 return RValue<SByte>(value);
1234 RValue<SByte> SByte::operator=(const Reference<SByte> &rhs) const
1236 Value *value = rhs.loadValue();
1239 return RValue<SByte>(value);
1242 RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs)
1244 return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value));
1247 RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs)
1249 return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value));
1252 RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs)
1254 return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value));
1257 RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs)
1259 return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value));
1262 RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs)
1264 return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value));
1267 RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs)
1269 return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value));
1272 RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs)
1274 return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value));
1277 RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs)
1279 return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value));
1282 RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs)
1284 return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value));
1287 RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs)
1289 return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value));
1292 RValue<SByte> operator+=(const SByte &lhs, RValue<SByte> rhs)
1294 return lhs = lhs + rhs;
1297 RValue<SByte> operator-=(const SByte &lhs, RValue<SByte> rhs)
1299 return lhs = lhs - rhs;
1302 RValue<SByte> operator*=(const SByte &lhs, RValue<SByte> rhs)
1304 return lhs = lhs * rhs;
1307 RValue<SByte> operator/=(const SByte &lhs, RValue<SByte> rhs)
1309 return lhs = lhs / rhs;
1312 RValue<SByte> operator%=(const SByte &lhs, RValue<SByte> rhs)
1314 return lhs = lhs % rhs;
1317 RValue<SByte> operator&=(const SByte &lhs, RValue<SByte> rhs)
1319 return lhs = lhs & rhs;
1322 RValue<SByte> operator|=(const SByte &lhs, RValue<SByte> rhs)
1324 return lhs = lhs | rhs;
1327 RValue<SByte> operator^=(const SByte &lhs, RValue<SByte> rhs)
1329 return lhs = lhs ^ rhs;
1332 RValue<SByte> operator<<=(const SByte &lhs, RValue<SByte> rhs)
1334 return lhs = lhs << rhs;
1337 RValue<SByte> operator>>=(const SByte &lhs, RValue<SByte> rhs)
1339 return lhs = lhs >> rhs;
1342 RValue<SByte> operator+(RValue<SByte> val)
1347 RValue<SByte> operator-(RValue<SByte> val)
1349 return RValue<SByte>(Nucleus::createNeg(val.value));
1352 RValue<SByte> operator~(RValue<SByte> val)
1354 return RValue<SByte>(Nucleus::createNot(val.value));
1357 RValue<SByte> operator++(const SByte &val, int) // Post-increment
1359 RValue<SByte> res = val;
1361 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantByte((signed char)1)));
1362 val.storeValue(inc);
1367 const SByte &operator++(const SByte &val) // Pre-increment
1369 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantByte((signed char)1)));
1370 val.storeValue(inc);
1375 RValue<SByte> operator--(const SByte &val, int) // Post-decrement
1377 RValue<SByte> res = val;
1379 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantByte((signed char)1)));
1380 val.storeValue(inc);
1385 const SByte &operator--(const SByte &val) // Pre-decrement
1387 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantByte((signed char)1)));
1388 val.storeValue(inc);
1393 RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs)
1395 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
1398 RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs)
1400 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
1403 RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs)
1405 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
1408 RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs)
1410 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
1413 RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs)
1415 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1418 RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs)
1420 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1423 Type *SByte::getType()
1425 return T(llvm::Type::getInt8Ty(*::context));
1428 Short::Short(Argument<Short> argument)
1430 storeValue(argument.value);
1433 Short::Short(RValue<Int> cast)
1435 Value *integer = Nucleus::createTrunc(cast.value, Short::getType());
1437 storeValue(integer);
1444 Short::Short(short x)
1446 storeValue(Nucleus::createConstantShort(x));
1449 Short::Short(RValue<Short> rhs)
1451 storeValue(rhs.value);
1454 Short::Short(const Short &rhs)
1456 Value *value = rhs.loadValue();
1460 Short::Short(const Reference<Short> &rhs)
1462 Value *value = rhs.loadValue();
1466 RValue<Short> Short::operator=(RValue<Short> rhs) const
1468 storeValue(rhs.value);
1473 RValue<Short> Short::operator=(const Short &rhs) const
1475 Value *value = rhs.loadValue();
1478 return RValue<Short>(value);
1481 RValue<Short> Short::operator=(const Reference<Short> &rhs) const
1483 Value *value = rhs.loadValue();
1486 return RValue<Short>(value);
1489 RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs)
1491 return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value));
1494 RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs)
1496 return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value));
1499 RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs)
1501 return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value));
1504 RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs)
1506 return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value));
1509 RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs)
1511 return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value));
1514 RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs)
1516 return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value));
1519 RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs)
1521 return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value));
1524 RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs)
1526 return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value));
1529 RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs)
1531 return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value));
1534 RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs)
1536 return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value));
1539 RValue<Short> operator+=(const Short &lhs, RValue<Short> rhs)
1541 return lhs = lhs + rhs;
1544 RValue<Short> operator-=(const Short &lhs, RValue<Short> rhs)
1546 return lhs = lhs - rhs;
1549 RValue<Short> operator*=(const Short &lhs, RValue<Short> rhs)
1551 return lhs = lhs * rhs;
1554 RValue<Short> operator/=(const Short &lhs, RValue<Short> rhs)
1556 return lhs = lhs / rhs;
1559 RValue<Short> operator%=(const Short &lhs, RValue<Short> rhs)
1561 return lhs = lhs % rhs;
1564 RValue<Short> operator&=(const Short &lhs, RValue<Short> rhs)
1566 return lhs = lhs & rhs;
1569 RValue<Short> operator|=(const Short &lhs, RValue<Short> rhs)
1571 return lhs = lhs | rhs;
1574 RValue<Short> operator^=(const Short &lhs, RValue<Short> rhs)
1576 return lhs = lhs ^ rhs;
1579 RValue<Short> operator<<=(const Short &lhs, RValue<Short> rhs)
1581 return lhs = lhs << rhs;
1584 RValue<Short> operator>>=(const Short &lhs, RValue<Short> rhs)
1586 return lhs = lhs >> rhs;
1589 RValue<Short> operator+(RValue<Short> val)
1594 RValue<Short> operator-(RValue<Short> val)
1596 return RValue<Short>(Nucleus::createNeg(val.value));
1599 RValue<Short> operator~(RValue<Short> val)
1601 return RValue<Short>(Nucleus::createNot(val.value));
1604 RValue<Short> operator++(const Short &val, int) // Post-increment
1606 RValue<Short> res = val;
1608 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantShort((short)1)));
1609 val.storeValue(inc);
1614 const Short &operator++(const Short &val) // Pre-increment
1616 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantShort((short)1)));
1617 val.storeValue(inc);
1622 RValue<Short> operator--(const Short &val, int) // Post-decrement
1624 RValue<Short> res = val;
1626 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantShort((short)1)));
1627 val.storeValue(inc);
1632 const Short &operator--(const Short &val) // Pre-decrement
1634 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantShort((short)1)));
1635 val.storeValue(inc);
1640 RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs)
1642 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
1645 RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs)
1647 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
1650 RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs)
1652 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
1655 RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs)
1657 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
1660 RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs)
1662 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1665 RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs)
1667 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1670 Type *Short::getType()
1672 return T(llvm::Type::getInt16Ty(*::context));
1675 UShort::UShort(Argument<UShort> argument)
1677 storeValue(argument.value);
1680 UShort::UShort(RValue<UInt> cast)
1682 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
1684 storeValue(integer);
1687 UShort::UShort(RValue<Int> cast)
1689 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
1691 storeValue(integer);
1698 UShort::UShort(unsigned short x)
1700 storeValue(Nucleus::createConstantShort(x));
1703 UShort::UShort(RValue<UShort> rhs)
1705 storeValue(rhs.value);
1708 UShort::UShort(const UShort &rhs)
1710 Value *value = rhs.loadValue();
1714 UShort::UShort(const Reference<UShort> &rhs)
1716 Value *value = rhs.loadValue();
1720 RValue<UShort> UShort::operator=(RValue<UShort> rhs) const
1722 storeValue(rhs.value);
1727 RValue<UShort> UShort::operator=(const UShort &rhs) const
1729 Value *value = rhs.loadValue();
1732 return RValue<UShort>(value);
1735 RValue<UShort> UShort::operator=(const Reference<UShort> &rhs) const
1737 Value *value = rhs.loadValue();
1740 return RValue<UShort>(value);
1743 RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs)
1745 return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value));
1748 RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs)
1750 return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value));
1753 RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs)
1755 return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value));
1758 RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs)
1760 return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value));
1763 RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs)
1765 return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value));
1768 RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs)
1770 return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value));
1773 RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs)
1775 return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value));
1778 RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs)
1780 return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value));
1783 RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs)
1785 return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value));
1788 RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs)
1790 return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value));
1793 RValue<UShort> operator+=(const UShort &lhs, RValue<UShort> rhs)
1795 return lhs = lhs + rhs;
1798 RValue<UShort> operator-=(const UShort &lhs, RValue<UShort> rhs)
1800 return lhs = lhs - rhs;
1803 RValue<UShort> operator*=(const UShort &lhs, RValue<UShort> rhs)
1805 return lhs = lhs * rhs;
1808 RValue<UShort> operator/=(const UShort &lhs, RValue<UShort> rhs)
1810 return lhs = lhs / rhs;
1813 RValue<UShort> operator%=(const UShort &lhs, RValue<UShort> rhs)
1815 return lhs = lhs % rhs;
1818 RValue<UShort> operator&=(const UShort &lhs, RValue<UShort> rhs)
1820 return lhs = lhs & rhs;
1823 RValue<UShort> operator|=(const UShort &lhs, RValue<UShort> rhs)
1825 return lhs = lhs | rhs;
1828 RValue<UShort> operator^=(const UShort &lhs, RValue<UShort> rhs)
1830 return lhs = lhs ^ rhs;
1833 RValue<UShort> operator<<=(const UShort &lhs, RValue<UShort> rhs)
1835 return lhs = lhs << rhs;
1838 RValue<UShort> operator>>=(const UShort &lhs, RValue<UShort> rhs)
1840 return lhs = lhs >> rhs;
1843 RValue<UShort> operator+(RValue<UShort> val)
1848 RValue<UShort> operator-(RValue<UShort> val)
1850 return RValue<UShort>(Nucleus::createNeg(val.value));
1853 RValue<UShort> operator~(RValue<UShort> val)
1855 return RValue<UShort>(Nucleus::createNot(val.value));
1858 RValue<UShort> operator++(const UShort &val, int) // Post-increment
1860 RValue<UShort> res = val;
1862 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantShort((unsigned short)1)));
1863 val.storeValue(inc);
1868 const UShort &operator++(const UShort &val) // Pre-increment
1870 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantShort((unsigned short)1)));
1871 val.storeValue(inc);
1876 RValue<UShort> operator--(const UShort &val, int) // Post-decrement
1878 RValue<UShort> res = val;
1880 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantShort((unsigned short)1)));
1881 val.storeValue(inc);
1886 const UShort &operator--(const UShort &val) // Pre-decrement
1888 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantShort((unsigned short)1)));
1889 val.storeValue(inc);
1894 RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs)
1896 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1899 RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs)
1901 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1904 RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs)
1906 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1909 RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs)
1911 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1914 RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs)
1916 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1919 RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs)
1921 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1924 Type *UShort::getType()
1926 return T(llvm::Type::getInt16Ty(*::context));
1929 Type *Byte4::getType()
1932 return T(VectorType::get(Byte::getType(), 4));
1934 return UInt::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block
1938 Type *SByte4::getType()
1941 return T(VectorType::get(SByte::getType(), 4));
1943 return Int::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block
1949 // xyzw.parent = this;
1952 Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
1954 // xyzw.parent = this;
1956 Constant *constantVector[8];
1957 constantVector[0] = Nucleus::createConstantByte(x0);
1958 constantVector[1] = Nucleus::createConstantByte(x1);
1959 constantVector[2] = Nucleus::createConstantByte(x2);
1960 constantVector[3] = Nucleus::createConstantByte(x3);
1961 constantVector[4] = Nucleus::createConstantByte(x4);
1962 constantVector[5] = Nucleus::createConstantByte(x5);
1963 constantVector[6] = Nucleus::createConstantByte(x6);
1964 constantVector[7] = Nucleus::createConstantByte(x7);
1965 Value *vector = V(Nucleus::createConstantVector(constantVector, 8));
1967 storeValue(Nucleus::createBitCast(vector, getType()));
1970 Byte8::Byte8(int64_t x)
1972 // xyzw.parent = this;
1974 Constant *constantVector[8];
1975 constantVector[0] = Nucleus::createConstantByte((unsigned char)(x >> 0));
1976 constantVector[1] = Nucleus::createConstantByte((unsigned char)(x >> 8));
1977 constantVector[2] = Nucleus::createConstantByte((unsigned char)(x >> 16));
1978 constantVector[3] = Nucleus::createConstantByte((unsigned char)(x >> 24));
1979 constantVector[4] = Nucleus::createConstantByte((unsigned char)(x >> 32));
1980 constantVector[5] = Nucleus::createConstantByte((unsigned char)(x >> 40));
1981 constantVector[6] = Nucleus::createConstantByte((unsigned char)(x >> 48));
1982 constantVector[7] = Nucleus::createConstantByte((unsigned char)(x >> 56));
1983 Value *vector = V(Nucleus::createConstantVector(constantVector, 8));
1985 storeValue(Nucleus::createBitCast(vector, getType()));
1988 Byte8::Byte8(RValue<Byte8> rhs)
1990 // xyzw.parent = this;
1992 storeValue(rhs.value);
1995 Byte8::Byte8(const Byte8 &rhs)
1997 // xyzw.parent = this;
1999 Value *value = rhs.loadValue();
2003 Byte8::Byte8(const Reference<Byte8> &rhs)
2005 // xyzw.parent = this;
2007 Value *value = rhs.loadValue();
2011 RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs) const
2013 storeValue(rhs.value);
2018 RValue<Byte8> Byte8::operator=(const Byte8 &rhs) const
2020 Value *value = rhs.loadValue();
2023 return RValue<Byte8>(value);
2026 RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs) const
2028 Value *value = rhs.loadValue();
2031 return RValue<Byte8>(value);
2034 RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs)
2036 if(CPUID::supportsMMX2())
2038 return x86::paddb(lhs, rhs);
2042 return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
2046 RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs)
2048 if(CPUID::supportsMMX2())
2050 return x86::psubb(lhs, rhs);
2054 return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
2058 // RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs)
2060 // return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value));
2063 // RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs)
2065 // return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value));
2068 // RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs)
2070 // return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value));
2073 RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs)
2075 if(CPUID::supportsMMX2())
2077 return As<Byte8>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
2081 return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
2085 RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs)
2087 if(CPUID::supportsMMX2())
2089 return As<Byte8>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
2093 return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
2097 RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs)
2099 if(CPUID::supportsMMX2())
2101 return As<Byte8>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
2105 return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
2109 // RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs)
2111 // return RValue<Byte8>(Nucleus::createShl(lhs.value, rhs.value));
2114 // RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs)
2116 // return RValue<Byte8>(Nucleus::createLShr(lhs.value, rhs.value));
2119 RValue<Byte8> operator+=(const Byte8 &lhs, RValue<Byte8> rhs)
2121 return lhs = lhs + rhs;
2124 RValue<Byte8> operator-=(const Byte8 &lhs, RValue<Byte8> rhs)
2126 return lhs = lhs - rhs;
2129 // RValue<Byte8> operator*=(const Byte8 &lhs, RValue<Byte8> rhs)
2131 // return lhs = lhs * rhs;
2134 // RValue<Byte8> operator/=(const Byte8 &lhs, RValue<Byte8> rhs)
2136 // return lhs = lhs / rhs;
2139 // RValue<Byte8> operator%=(const Byte8 &lhs, RValue<Byte8> rhs)
2141 // return lhs = lhs % rhs;
2144 RValue<Byte8> operator&=(const Byte8 &lhs, RValue<Byte8> rhs)
2146 return lhs = lhs & rhs;
2149 RValue<Byte8> operator|=(const Byte8 &lhs, RValue<Byte8> rhs)
2151 return lhs = lhs | rhs;
2154 RValue<Byte8> operator^=(const Byte8 &lhs, RValue<Byte8> rhs)
2156 return lhs = lhs ^ rhs;
2159 // RValue<Byte8> operator<<=(const Byte8 &lhs, RValue<Byte8> rhs)
2161 // return lhs = lhs << rhs;
2164 // RValue<Byte8> operator>>=(const Byte8 &lhs, RValue<Byte8> rhs)
2166 // return lhs = lhs >> rhs;
2169 // RValue<Byte8> operator+(RValue<Byte8> val)
2174 // RValue<Byte8> operator-(RValue<Byte8> val)
2176 // return RValue<Byte8>(Nucleus::createNeg(val.value));
2179 RValue<Byte8> operator~(RValue<Byte8> val)
2181 if(CPUID::supportsMMX2())
2183 return val ^ Byte8(0xFFFFFFFFFFFFFFFF);
2187 return RValue<Byte8>(Nucleus::createNot(val.value));
2191 RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2193 return x86::paddusb(x, y);
2196 RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2198 return x86::psubusb(x, y);
2201 RValue<Short4> Unpack(RValue<Byte4> x)
2203 Value *int2 = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), x.value, 0);
2204 Value *byte8 = Nucleus::createBitCast(int2, Byte8::getType());
2206 return UnpackLow(RValue<Byte8>(byte8), RValue<Byte8>(byte8));
2209 RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
2211 if(CPUID::supportsMMX2())
2213 return x86::punpcklbw(x, y);
2217 Constant *shuffle[8];
2218 shuffle[0] = Nucleus::createConstantInt(0);
2219 shuffle[1] = Nucleus::createConstantInt(8);
2220 shuffle[2] = Nucleus::createConstantInt(1);
2221 shuffle[3] = Nucleus::createConstantInt(9);
2222 shuffle[4] = Nucleus::createConstantInt(2);
2223 shuffle[5] = Nucleus::createConstantInt(10);
2224 shuffle[6] = Nucleus::createConstantInt(3);
2225 shuffle[7] = Nucleus::createConstantInt(11);
2227 Value *packed = Nucleus::createShuffleVector(x.value, y.value, V(Nucleus::createConstantVector(shuffle, 8)));
2229 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2233 RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
2235 if(CPUID::supportsMMX2())
2237 return x86::punpckhbw(x, y);
2241 Constant *shuffle[8];
2242 shuffle[0] = Nucleus::createConstantInt(4);
2243 shuffle[1] = Nucleus::createConstantInt(12);
2244 shuffle[2] = Nucleus::createConstantInt(5);
2245 shuffle[3] = Nucleus::createConstantInt(13);
2246 shuffle[4] = Nucleus::createConstantInt(6);
2247 shuffle[5] = Nucleus::createConstantInt(14);
2248 shuffle[6] = Nucleus::createConstantInt(7);
2249 shuffle[7] = Nucleus::createConstantInt(15);
2251 Value *packed = Nucleus::createShuffleVector(x.value, y.value, V(Nucleus::createConstantVector(shuffle, 8)));
2253 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2257 RValue<Int> SignMask(RValue<Byte8> x)
2259 return x86::pmovmskb(x);
2262 // RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2264 // return x86::pcmpgtb(x, y); // FIXME: Signedness
2267 RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2269 return x86::pcmpeqb(x, y);
2272 Type *Byte8::getType()
2274 if(CPUID::supportsMMX2())
2276 return MMX::getType();
2280 return T(VectorType::get(Byte::getType(), 8));
2286 // xyzw.parent = this;
2289 SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2291 // xyzw.parent = this;
2293 Constant *constantVector[8];
2294 constantVector[0] = Nucleus::createConstantByte(x0);
2295 constantVector[1] = Nucleus::createConstantByte(x1);
2296 constantVector[2] = Nucleus::createConstantByte(x2);
2297 constantVector[3] = Nucleus::createConstantByte(x3);
2298 constantVector[4] = Nucleus::createConstantByte(x4);
2299 constantVector[5] = Nucleus::createConstantByte(x5);
2300 constantVector[6] = Nucleus::createConstantByte(x6);
2301 constantVector[7] = Nucleus::createConstantByte(x7);
2302 Value *vector = V(Nucleus::createConstantVector(constantVector, 8));
2304 storeValue(Nucleus::createBitCast(vector, getType()));
2307 SByte8::SByte8(int64_t x)
2309 // xyzw.parent = this;
2311 Constant *constantVector[8];
2312 constantVector[0] = Nucleus::createConstantByte((unsigned char)(x >> 0));
2313 constantVector[1] = Nucleus::createConstantByte((unsigned char)(x >> 8));
2314 constantVector[2] = Nucleus::createConstantByte((unsigned char)(x >> 16));
2315 constantVector[3] = Nucleus::createConstantByte((unsigned char)(x >> 24));
2316 constantVector[4] = Nucleus::createConstantByte((unsigned char)(x >> 32));
2317 constantVector[5] = Nucleus::createConstantByte((unsigned char)(x >> 40));
2318 constantVector[6] = Nucleus::createConstantByte((unsigned char)(x >> 48));
2319 constantVector[7] = Nucleus::createConstantByte((unsigned char)(x >> 56));
2320 Value *vector = V(Nucleus::createConstantVector(constantVector, 8));
2322 storeValue(Nucleus::createBitCast(vector, getType()));
2325 SByte8::SByte8(RValue<SByte8> rhs)
2327 // xyzw.parent = this;
2329 storeValue(rhs.value);
2332 SByte8::SByte8(const SByte8 &rhs)
2334 // xyzw.parent = this;
2336 Value *value = rhs.loadValue();
2340 SByte8::SByte8(const Reference<SByte8> &rhs)
2342 // xyzw.parent = this;
2344 Value *value = rhs.loadValue();
2348 RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs) const
2350 storeValue(rhs.value);
2355 RValue<SByte8> SByte8::operator=(const SByte8 &rhs) const
2357 Value *value = rhs.loadValue();
2360 return RValue<SByte8>(value);
2363 RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs) const
2365 Value *value = rhs.loadValue();
2368 return RValue<SByte8>(value);
2371 RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs)
2373 if(CPUID::supportsMMX2())
2375 return As<SByte8>(x86::paddb(As<Byte8>(lhs), As<Byte8>(rhs)));
2379 return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
2383 RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs)
2385 if(CPUID::supportsMMX2())
2387 return As<SByte8>(x86::psubb(As<Byte8>(lhs), As<Byte8>(rhs)));
2391 return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
2395 // RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs)
2397 // return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value));
2400 // RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs)
2402 // return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value));
2405 // RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs)
2407 // return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value));
2410 RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs)
2412 return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value));
2415 RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs)
2417 return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value));
2420 RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs)
2422 return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value));
2425 // RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2427 // return RValue<SByte8>(Nucleus::createShl(lhs.value, rhs.value));
2430 // RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2432 // return RValue<SByte8>(Nucleus::createAShr(lhs.value, rhs.value));
2435 RValue<SByte8> operator+=(const SByte8 &lhs, RValue<SByte8> rhs)
2437 return lhs = lhs + rhs;
2440 RValue<SByte8> operator-=(const SByte8 &lhs, RValue<SByte8> rhs)
2442 return lhs = lhs - rhs;
2445 // RValue<SByte8> operator*=(const SByte8 &lhs, RValue<SByte8> rhs)
2447 // return lhs = lhs * rhs;
2450 // RValue<SByte8> operator/=(const SByte8 &lhs, RValue<SByte8> rhs)
2452 // return lhs = lhs / rhs;
2455 // RValue<SByte8> operator%=(const SByte8 &lhs, RValue<SByte8> rhs)
2457 // return lhs = lhs % rhs;
2460 RValue<SByte8> operator&=(const SByte8 &lhs, RValue<SByte8> rhs)
2462 return lhs = lhs & rhs;
2465 RValue<SByte8> operator|=(const SByte8 &lhs, RValue<SByte8> rhs)
2467 return lhs = lhs | rhs;
2470 RValue<SByte8> operator^=(const SByte8 &lhs, RValue<SByte8> rhs)
2472 return lhs = lhs ^ rhs;
2475 // RValue<SByte8> operator<<=(const SByte8 &lhs, RValue<SByte8> rhs)
2477 // return lhs = lhs << rhs;
2480 // RValue<SByte8> operator>>=(const SByte8 &lhs, RValue<SByte8> rhs)
2482 // return lhs = lhs >> rhs;
2485 // RValue<SByte8> operator+(RValue<SByte8> val)
2490 // RValue<SByte8> operator-(RValue<SByte8> val)
2492 // return RValue<SByte8>(Nucleus::createNeg(val.value));
2495 RValue<SByte8> operator~(RValue<SByte8> val)
2497 if(CPUID::supportsMMX2())
2499 return val ^ SByte8(0xFFFFFFFFFFFFFFFF);
2503 return RValue<SByte8>(Nucleus::createNot(val.value));
2507 RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2509 return x86::paddsb(x, y);
2512 RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2514 return x86::psubsb(x, y);
2517 RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
2519 if(CPUID::supportsMMX2())
2521 return As<Short4>(x86::punpcklbw(As<Byte8>(x), As<Byte8>(y)));
2525 Constant *shuffle[8];
2526 shuffle[0] = Nucleus::createConstantInt(0);
2527 shuffle[1] = Nucleus::createConstantInt(8);
2528 shuffle[2] = Nucleus::createConstantInt(1);
2529 shuffle[3] = Nucleus::createConstantInt(9);
2530 shuffle[4] = Nucleus::createConstantInt(2);
2531 shuffle[5] = Nucleus::createConstantInt(10);
2532 shuffle[6] = Nucleus::createConstantInt(3);
2533 shuffle[7] = Nucleus::createConstantInt(11);
2535 Value *packed = Nucleus::createShuffleVector(x.value, y.value, V(Nucleus::createConstantVector(shuffle, 8)));
2537 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2541 RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
2543 if(CPUID::supportsMMX2())
2545 return As<Short4>(x86::punpckhbw(As<Byte8>(x), As<Byte8>(y)));
2549 Constant *shuffle[8];
2550 shuffle[0] = Nucleus::createConstantInt(4);
2551 shuffle[1] = Nucleus::createConstantInt(12);
2552 shuffle[2] = Nucleus::createConstantInt(5);
2553 shuffle[3] = Nucleus::createConstantInt(13);
2554 shuffle[4] = Nucleus::createConstantInt(6);
2555 shuffle[5] = Nucleus::createConstantInt(14);
2556 shuffle[6] = Nucleus::createConstantInt(7);
2557 shuffle[7] = Nucleus::createConstantInt(15);
2559 Value *packed = Nucleus::createShuffleVector(x.value, y.value, V(Nucleus::createConstantVector(shuffle, 8)));
2561 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2565 RValue<Int> SignMask(RValue<SByte8> x)
2567 return x86::pmovmskb(As<Byte8>(x));
2570 RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2572 return x86::pcmpgtb(x, y);
2575 RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2577 return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
2580 Type *SByte8::getType()
2582 if(CPUID::supportsMMX2())
2584 return MMX::getType();
2588 return T(VectorType::get(SByte::getType(), 8));
2592 Byte16::Byte16(RValue<Byte16> rhs)
2594 // xyzw.parent = this;
2596 storeValue(rhs.value);
2599 Byte16::Byte16(const Byte16 &rhs)
2601 // xyzw.parent = this;
2603 Value *value = rhs.loadValue();
2607 Byte16::Byte16(const Reference<Byte16> &rhs)
2609 // xyzw.parent = this;
2611 Value *value = rhs.loadValue();
2615 RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs) const
2617 storeValue(rhs.value);
2622 RValue<Byte16> Byte16::operator=(const Byte16 &rhs) const
2624 Value *value = rhs.loadValue();
2627 return RValue<Byte16>(value);
2630 RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs) const
2632 Value *value = rhs.loadValue();
2635 return RValue<Byte16>(value);
2638 Type *Byte16::getType()
2640 return T(VectorType::get(Byte::getType(), 16));
2643 Type *SByte16::getType()
2645 return T( VectorType::get(SByte::getType(), 16));
2648 Short4::Short4(RValue<Int> cast)
2650 Value *extend = Nucleus::createZExt(cast.value, Long::getType());
2651 Value *swizzle = Swizzle(RValue<Short4>(extend), 0x00).value;
2653 storeValue(swizzle);
2656 Short4::Short4(RValue<Int4> cast)
2658 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2660 #if 0 // FIXME: Check codegen (pshuflw phshufhw pshufd)
2662 pack[0] = Nucleus::createConstantInt(0);
2663 pack[1] = Nucleus::createConstantInt(2);
2664 pack[2] = Nucleus::createConstantInt(4);
2665 pack[3] = Nucleus::createConstantInt(6);
2667 Value *short4 = Nucleus::createShuffleVector(short8, short8, Nucleus::createConstantVector(pack, 4));
2671 // FIXME: Use Swizzle<Short8>
2672 if(!CPUID::supportsSSSE3())
2674 Constant *pshuflw[8];
2675 pshuflw[0] = Nucleus::createConstantInt(0);
2676 pshuflw[1] = Nucleus::createConstantInt(2);
2677 pshuflw[2] = Nucleus::createConstantInt(0);
2678 pshuflw[3] = Nucleus::createConstantInt(2);
2679 pshuflw[4] = Nucleus::createConstantInt(4);
2680 pshuflw[5] = Nucleus::createConstantInt(5);
2681 pshuflw[6] = Nucleus::createConstantInt(6);
2682 pshuflw[7] = Nucleus::createConstantInt(7);
2684 Constant *pshufhw[8];
2685 pshufhw[0] = Nucleus::createConstantInt(0);
2686 pshufhw[1] = Nucleus::createConstantInt(1);
2687 pshufhw[2] = Nucleus::createConstantInt(2);
2688 pshufhw[3] = Nucleus::createConstantInt(3);
2689 pshufhw[4] = Nucleus::createConstantInt(4);
2690 pshufhw[5] = Nucleus::createConstantInt(6);
2691 pshufhw[6] = Nucleus::createConstantInt(4);
2692 pshufhw[7] = Nucleus::createConstantInt(6);
2694 Value *shuffle1 = Nucleus::createShuffleVector(short8, V(UndefValue::get(Short8::getType())), V(Nucleus::createConstantVector(pshuflw, 8)));
2695 Value *shuffle2 = Nucleus::createShuffleVector(shuffle1, V(UndefValue::get(Short8::getType())), V(Nucleus::createConstantVector(pshufhw, 8)));
2696 Value *int4 = Nucleus::createBitCast(shuffle2, Int4::getType());
2697 packed = Nucleus::createSwizzle(int4, 0x88);
2701 Constant *pshufb[16];
2702 pshufb[0] = Nucleus::createConstantInt(0);
2703 pshufb[1] = Nucleus::createConstantInt(1);
2704 pshufb[2] = Nucleus::createConstantInt(4);
2705 pshufb[3] = Nucleus::createConstantInt(5);
2706 pshufb[4] = Nucleus::createConstantInt(8);
2707 pshufb[5] = Nucleus::createConstantInt(9);
2708 pshufb[6] = Nucleus::createConstantInt(12);
2709 pshufb[7] = Nucleus::createConstantInt(13);
2710 pshufb[8] = Nucleus::createConstantInt(0);
2711 pshufb[9] = Nucleus::createConstantInt(1);
2712 pshufb[10] = Nucleus::createConstantInt(4);
2713 pshufb[11] = Nucleus::createConstantInt(5);
2714 pshufb[12] = Nucleus::createConstantInt(8);
2715 pshufb[13] = Nucleus::createConstantInt(9);
2716 pshufb[14] = Nucleus::createConstantInt(12);
2717 pshufb[15] = Nucleus::createConstantInt(13);
2719 Value *byte16 = Nucleus::createBitCast(cast.value, Byte16::getType());
2720 packed = Nucleus::createShuffleVector(byte16, V(UndefValue::get(Byte16::getType())), V(Nucleus::createConstantVector(pshufb, 16)));
2723 #if 0 // FIXME: No optimal instruction selection
2724 Value *qword2 = Nucleus::createBitCast(packed, Long2::getType());
2725 Value *element = Nucleus::createExtractElement(qword2, 0);
2726 Value *short4 = Nucleus::createBitCast(element, Short4::getType());
2727 #else // FIXME: Requires SSE
2728 Value *int2 = RValue<Int2>(Int2(RValue<Int4>(packed))).value;
2729 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
2736 // Short4::Short4(RValue<Float> cast)
2740 Short4::Short4(RValue<Float4> cast)
2742 Int4 v4i32 = Int4(cast);
2743 v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
2745 storeValue(As<Short4>(Int2(v4i32)).value);
2750 // xyzw.parent = this;
2753 Short4::Short4(short xyzw)
2755 // xyzw.parent = this;
2757 Constant *constantVector[4];
2758 constantVector[0] = Nucleus::createConstantShort(xyzw);
2759 constantVector[1] = Nucleus::createConstantShort(xyzw);
2760 constantVector[2] = Nucleus::createConstantShort(xyzw);
2761 constantVector[3] = Nucleus::createConstantShort(xyzw);
2762 Value *vector = V(Nucleus::createConstantVector(constantVector, 4));
2764 storeValue(Nucleus::createBitCast(vector, getType()));
2767 Short4::Short4(short x, short y, short z, short w)
2769 // xyzw.parent = this;
2771 Constant *constantVector[4];
2772 constantVector[0] = Nucleus::createConstantShort(x);
2773 constantVector[1] = Nucleus::createConstantShort(y);
2774 constantVector[2] = Nucleus::createConstantShort(z);
2775 constantVector[3] = Nucleus::createConstantShort(w);
2776 Value *vector = V(Nucleus::createConstantVector(constantVector, 4));
2778 storeValue(Nucleus::createBitCast(vector, getType()));
2781 Short4::Short4(RValue<Short4> rhs)
2783 // xyzw.parent = this;
2785 storeValue(rhs.value);
2788 Short4::Short4(const Short4 &rhs)
2790 // xyzw.parent = this;
2792 Value *value = rhs.loadValue();
2796 Short4::Short4(const Reference<Short4> &rhs)
2798 // xyzw.parent = this;
2800 Value *value = rhs.loadValue();
2804 Short4::Short4(RValue<UShort4> rhs)
2806 // xyzw.parent = this;
2808 storeValue(rhs.value);
2811 Short4::Short4(const UShort4 &rhs)
2813 // xyzw.parent = this;
2815 storeValue(rhs.loadValue());
2818 Short4::Short4(const Reference<UShort4> &rhs)
2820 // xyzw.parent = this;
2822 storeValue(rhs.loadValue());
2825 RValue<Short4> Short4::operator=(RValue<Short4> rhs) const
2827 storeValue(rhs.value);
2832 RValue<Short4> Short4::operator=(const Short4 &rhs) const
2834 Value *value = rhs.loadValue();
2837 return RValue<Short4>(value);
2840 RValue<Short4> Short4::operator=(const Reference<Short4> &rhs) const
2842 Value *value = rhs.loadValue();
2845 return RValue<Short4>(value);
2848 RValue<Short4> Short4::operator=(RValue<UShort4> rhs) const
2850 storeValue(rhs.value);
2852 return RValue<Short4>(rhs);
2855 RValue<Short4> Short4::operator=(const UShort4 &rhs) const
2857 Value *value = rhs.loadValue();
2860 return RValue<Short4>(value);
2863 RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs) const
2865 Value *value = rhs.loadValue();
2868 return RValue<Short4>(value);
2871 RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs)
2873 if(CPUID::supportsMMX2())
2875 return x86::paddw(lhs, rhs);
2879 return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
2883 RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs)
2885 if(CPUID::supportsMMX2())
2887 return x86::psubw(lhs, rhs);
2891 return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
2895 RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs)
2897 if(CPUID::supportsMMX2())
2899 return x86::pmullw(lhs, rhs);
2903 return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
2907 // RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs)
2909 // return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value));
2912 // RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs)
2914 // return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value));
2917 RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs)
2919 if(CPUID::supportsMMX2())
2921 return x86::pand(lhs, rhs);
2925 return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
2929 RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs)
2931 if(CPUID::supportsMMX2())
2933 return x86::por(lhs, rhs);
2937 return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
2941 RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs)
2943 if(CPUID::supportsMMX2())
2945 return x86::pxor(lhs, rhs);
2949 return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
2953 RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2955 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2957 return x86::psllw(lhs, rhs);
2960 RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2962 // return RValue<Short4>(Nucleus::createAShr(lhs.value, rhs.value));
2964 return x86::psraw(lhs, rhs);
2967 RValue<Short4> operator<<(RValue<Short4> lhs, RValue<Long1> rhs)
2969 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2971 return x86::psllw(lhs, rhs);
2974 RValue<Short4> operator>>(RValue<Short4> lhs, RValue<Long1> rhs)
2976 // return RValue<Short4>(Nucleus::createAShr(lhs.value, rhs.value));
2978 return x86::psraw(lhs, rhs);
2981 RValue<Short4> operator+=(const Short4 &lhs, RValue<Short4> rhs)
2983 return lhs = lhs + rhs;
2986 RValue<Short4> operator-=(const Short4 &lhs, RValue<Short4> rhs)
2988 return lhs = lhs - rhs;
2991 RValue<Short4> operator*=(const Short4 &lhs, RValue<Short4> rhs)
2993 return lhs = lhs * rhs;
2996 // RValue<Short4> operator/=(const Short4 &lhs, RValue<Short4> rhs)
2998 // return lhs = lhs / rhs;
3001 // RValue<Short4> operator%=(const Short4 &lhs, RValue<Short4> rhs)
3003 // return lhs = lhs % rhs;
3006 RValue<Short4> operator&=(const Short4 &lhs, RValue<Short4> rhs)
3008 return lhs = lhs & rhs;
3011 RValue<Short4> operator|=(const Short4 &lhs, RValue<Short4> rhs)
3013 return lhs = lhs | rhs;
3016 RValue<Short4> operator^=(const Short4 &lhs, RValue<Short4> rhs)
3018 return lhs = lhs ^ rhs;
3021 RValue<Short4> operator<<=(const Short4 &lhs, unsigned char rhs)
3023 return lhs = lhs << rhs;
3026 RValue<Short4> operator>>=(const Short4 &lhs, unsigned char rhs)
3028 return lhs = lhs >> rhs;
3031 RValue<Short4> operator<<=(const Short4 &lhs, RValue<Long1> rhs)
3033 return lhs = lhs << rhs;
3036 RValue<Short4> operator>>=(const Short4 &lhs, RValue<Long1> rhs)
3038 return lhs = lhs >> rhs;
3041 // RValue<Short4> operator+(RValue<Short4> val)
3046 RValue<Short4> operator-(RValue<Short4> val)
3048 if(CPUID::supportsMMX2())
3050 return Short4(0, 0, 0, 0) - val;
3054 return RValue<Short4>(Nucleus::createNeg(val.value));
3058 RValue<Short4> operator~(RValue<Short4> val)
3060 if(CPUID::supportsMMX2())
3062 return val ^ Short4(0xFFFFu, 0xFFFFu, 0xFFFFu, 0xFFFFu);
3066 return RValue<Short4>(Nucleus::createNot(val.value));
3070 RValue<Short4> RoundShort4(RValue<Float4> cast)
3072 RValue<Int4> v4i32 = x86::cvtps2dq(cast);
3073 RValue<Short8> v8i16 = x86::packssdw(v4i32, v4i32);
3075 return As<Short4>(Int2(As<Int4>(v8i16)));
3078 RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
3080 return x86::pmaxsw(x, y);
3083 RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
3085 return x86::pminsw(x, y);
3088 RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
3090 return x86::paddsw(x, y);
3093 RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
3095 return x86::psubsw(x, y);
3098 RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
3100 return x86::pmulhw(x, y);
3103 RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
3105 return x86::pmaddwd(x, y);
3108 RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y)
3110 return x86::packsswb(x, y);
3113 RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
3115 if(CPUID::supportsMMX2())
3117 return x86::punpcklwd(x, y);
3121 Constant *shuffle[4];
3122 shuffle[0] = Nucleus::createConstantInt(0);
3123 shuffle[1] = Nucleus::createConstantInt(4);
3124 shuffle[2] = Nucleus::createConstantInt(1);
3125 shuffle[3] = Nucleus::createConstantInt(5);
3127 Value *packed = Nucleus::createShuffleVector(x.value, y.value, V(Nucleus::createConstantVector(shuffle, 4)));
3129 return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType()));
3133 RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
3135 if(CPUID::supportsMMX2())
3137 return x86::punpckhwd(x, y);
3141 Constant *shuffle[4];
3142 shuffle[0] = Nucleus::createConstantInt(2);
3143 shuffle[1] = Nucleus::createConstantInt(6);
3144 shuffle[2] = Nucleus::createConstantInt(3);
3145 shuffle[3] = Nucleus::createConstantInt(7);
3147 Value *packed = Nucleus::createShuffleVector(x.value, y.value, V(Nucleus::createConstantVector(shuffle, 4)));
3149 return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType()));
3153 RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select)
3155 if(CPUID::supportsMMX2())
3157 return x86::pshufw(x, select);
3161 return RValue<Short4>(Nucleus::createSwizzle(x.value, select));
3165 RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i)
3167 if(CPUID::supportsMMX2())
3169 return x86::pinsrw(val, Int(element), i);
3173 return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
3177 RValue<Short> Extract(RValue<Short4> val, int i)
3179 if(CPUID::supportsMMX2())
3181 return Short(x86::pextrw(val, i));
3185 return RValue<Short>(Nucleus::createExtractElement(val.value, i));
3189 RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
3191 return x86::pcmpgtw(x, y);
3194 RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
3196 return x86::pcmpeqw(x, y);
3199 Type *Short4::getType()
3201 if(CPUID::supportsMMX2())
3203 return MMX::getType();
3207 return T(VectorType::get(Short::getType(), 4));
3211 UShort4::UShort4(RValue<Int4> cast)
3213 *this = Short4(cast);
3216 UShort4::UShort4(RValue<Float4> cast, bool saturate)
3222 if(CPUID::supportsSSE4_1())
3224 sat = Min(cast, Float4(0xFFFF)); // packusdw takes care of 0x0000 saturation
3228 sat = Max(Min(cast, Float4(0xFFFF)), Float4(0x0000));
3238 if(!saturate || !CPUID::supportsSSE4_1())
3240 *this = Short4(Int4(int4));
3244 *this = As<Short4>(Int2(As<Int4>(x86::packusdw(As<UInt4>(int4), As<UInt4>(int4)))));
3250 // xyzw.parent = this;
3253 UShort4::UShort4(unsigned short xyzw)
3255 // xyzw.parent = this;
3257 Constant *constantVector[4];
3258 constantVector[0] = Nucleus::createConstantShort(xyzw);
3259 constantVector[1] = Nucleus::createConstantShort(xyzw);
3260 constantVector[2] = Nucleus::createConstantShort(xyzw);
3261 constantVector[3] = Nucleus::createConstantShort(xyzw);
3262 Value *vector = V(Nucleus::createConstantVector(constantVector, 4));
3264 storeValue(Nucleus::createBitCast(vector, getType()));
3267 UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
3269 // xyzw.parent = this;
3271 Constant *constantVector[4];
3272 constantVector[0] = Nucleus::createConstantShort(x);
3273 constantVector[1] = Nucleus::createConstantShort(y);
3274 constantVector[2] = Nucleus::createConstantShort(z);
3275 constantVector[3] = Nucleus::createConstantShort(w);
3276 Value *vector = V(Nucleus::createConstantVector(constantVector, 4));
3278 storeValue(Nucleus::createBitCast(vector, getType()));
3281 UShort4::UShort4(RValue<UShort4> rhs)
3283 // xyzw.parent = this;
3285 storeValue(rhs.value);
3288 UShort4::UShort4(const UShort4 &rhs)
3290 // xyzw.parent = this;
3292 Value *value = rhs.loadValue();
3296 UShort4::UShort4(const Reference<UShort4> &rhs)
3298 // xyzw.parent = this;
3300 Value *value = rhs.loadValue();
3304 UShort4::UShort4(RValue<Short4> rhs)
3306 // xyzw.parent = this;
3308 storeValue(rhs.value);
3311 UShort4::UShort4(const Short4 &rhs)
3313 // xyzw.parent = this;
3315 Value *value = rhs.loadValue();
3319 UShort4::UShort4(const Reference<Short4> &rhs)
3321 // xyzw.parent = this;
3323 Value *value = rhs.loadValue();
3327 RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs) const
3329 storeValue(rhs.value);
3334 RValue<UShort4> UShort4::operator=(const UShort4 &rhs) const
3336 Value *value = rhs.loadValue();
3339 return RValue<UShort4>(value);
3342 RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs) const
3344 Value *value = rhs.loadValue();
3347 return RValue<UShort4>(value);
3350 RValue<UShort4> UShort4::operator=(RValue<Short4> rhs) const
3352 storeValue(rhs.value);
3354 return RValue<UShort4>(rhs);
3357 RValue<UShort4> UShort4::operator=(const Short4 &rhs) const
3359 Value *value = rhs.loadValue();
3362 return RValue<UShort4>(value);
3365 RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs) const
3367 Value *value = rhs.loadValue();
3370 return RValue<UShort4>(value);
3373 RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs)
3375 if(CPUID::supportsMMX2())
3377 return As<UShort4>(x86::paddw(As<Short4>(lhs), As<Short4>(rhs)));
3381 return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
3385 RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs)
3387 if(CPUID::supportsMMX2())
3389 return As<UShort4>(x86::psubw(As<Short4>(lhs), As<Short4>(rhs)));
3393 return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
3397 RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs)
3399 if(CPUID::supportsMMX2())
3401 return As<UShort4>(x86::pmullw(As<Short4>(lhs), As<Short4>(rhs)));
3405 return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
3409 RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
3411 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
3413 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
3416 RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
3418 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
3420 return x86::psrlw(lhs, rhs);
3423 RValue<UShort4> operator<<(RValue<UShort4> lhs, RValue<Long1> rhs)
3425 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
3427 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
3430 RValue<UShort4> operator>>(RValue<UShort4> lhs, RValue<Long1> rhs)
3432 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
3434 return x86::psrlw(lhs, rhs);
3437 RValue<UShort4> operator<<=(const UShort4 &lhs, unsigned char rhs)
3439 return lhs = lhs << rhs;
3442 RValue<UShort4> operator>>=(const UShort4 &lhs, unsigned char rhs)
3444 return lhs = lhs >> rhs;
3447 RValue<UShort4> operator<<=(const UShort4 &lhs, RValue<Long1> rhs)
3449 return lhs = lhs << rhs;
3452 RValue<UShort4> operator>>=(const UShort4 &lhs, RValue<Long1> rhs)
3454 return lhs = lhs >> rhs;
3457 RValue<UShort4> operator~(RValue<UShort4> val)
3459 if(CPUID::supportsMMX2())
3461 return As<UShort4>(As<Short4>(val) ^ Short4(0xFFFFu, 0xFFFFu, 0xFFFFu, 0xFFFFu));
3465 return RValue<UShort4>(Nucleus::createNot(val.value));
3469 RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
3471 return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
3474 RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
3476 return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
3479 RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
3481 return x86::paddusw(x, y);
3484 RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
3486 return x86::psubusw(x, y);
3489 RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
3491 return x86::pmulhuw(x, y);
3494 RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3496 return x86::pavgw(x, y);
3499 RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y)
3501 return x86::packuswb(x, y);
3504 Type *UShort4::getType()
3506 if(CPUID::supportsMMX2())
3508 return MMX::getType();
3512 return T(VectorType::get(UShort::getType(), 4));
3516 Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
3518 // xyzw.parent = this;
3520 Constant *constantVector[8];
3521 constantVector[0] = Nucleus::createConstantShort(c0);
3522 constantVector[1] = Nucleus::createConstantShort(c1);
3523 constantVector[2] = Nucleus::createConstantShort(c2);
3524 constantVector[3] = Nucleus::createConstantShort(c3);
3525 constantVector[4] = Nucleus::createConstantShort(c4);
3526 constantVector[5] = Nucleus::createConstantShort(c5);
3527 constantVector[6] = Nucleus::createConstantShort(c6);
3528 constantVector[7] = Nucleus::createConstantShort(c7);
3530 storeValue(Nucleus::createConstantVector(constantVector, 8));
3533 Short8::Short8(RValue<Short8> rhs)
3535 // xyzw.parent = this;
3537 storeValue(rhs.value);
3540 Short8::Short8(const Reference<Short8> &rhs)
3542 // xyzw.parent = this;
3544 Value *value = rhs.loadValue();
3548 Short8::Short8(RValue<Short4> lo, RValue<Short4> hi)
3550 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
3551 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
3553 Value *long2 = V(UndefValue::get(Long2::getType()));
3554 long2 = Nucleus::createInsertElement(long2, loLong, 0);
3555 long2 = Nucleus::createInsertElement(long2, hiLong, 1);
3556 Value *short8 = Nucleus::createBitCast(long2, Short8::getType());
3561 RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs)
3563 return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value));
3566 RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs)
3568 return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
3571 RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3573 return x86::psllw(lhs, rhs); // FIXME: Fallback required
3576 RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3578 return x86::psraw(lhs, rhs); // FIXME: Fallback required
3581 RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3583 return x86::pmaddwd(x, y); // FIXME: Fallback required
3586 RValue<Int4> Abs(RValue<Int4> x)
3588 if(CPUID::supportsSSSE3())
3590 return x86::pabsd(x);
3594 Int4 mask = (x >> 31);
3595 return (mask ^ x) - mask;
3599 RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3601 return x86::pmulhw(x, y); // FIXME: Fallback required
3604 Type *Short8::getType()
3606 return T(VectorType::get(Short::getType(), 8));
3609 UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
3611 // xyzw.parent = this;
3613 Constant *constantVector[8];
3614 constantVector[0] = Nucleus::createConstantShort(c0);
3615 constantVector[1] = Nucleus::createConstantShort(c1);
3616 constantVector[2] = Nucleus::createConstantShort(c2);
3617 constantVector[3] = Nucleus::createConstantShort(c3);
3618 constantVector[4] = Nucleus::createConstantShort(c4);
3619 constantVector[5] = Nucleus::createConstantShort(c5);
3620 constantVector[6] = Nucleus::createConstantShort(c6);
3621 constantVector[7] = Nucleus::createConstantShort(c7);
3623 storeValue(Nucleus::createConstantVector(constantVector, 8));
3626 UShort8::UShort8(RValue<UShort8> rhs)
3628 // xyzw.parent = this;
3630 storeValue(rhs.value);
3633 UShort8::UShort8(const Reference<UShort8> &rhs)
3635 // xyzw.parent = this;
3637 Value *value = rhs.loadValue();
3641 UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi)
3643 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
3644 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
3646 Value *long2 = V(UndefValue::get(Long2::getType()));
3647 long2 = Nucleus::createInsertElement(long2, loLong, 0);
3648 long2 = Nucleus::createInsertElement(long2, hiLong, 1);
3649 Value *short8 = Nucleus::createBitCast(long2, Short8::getType());
3654 RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs) const
3656 storeValue(rhs.value);
3661 RValue<UShort8> UShort8::operator=(const UShort8 &rhs) const
3663 Value *value = rhs.loadValue();
3666 return RValue<UShort8>(value);
3669 RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs) const
3671 Value *value = rhs.loadValue();
3674 return RValue<UShort8>(value);
3677 RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs)
3679 return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
3682 RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3684 return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs)); // FIXME: Fallback required
3687 RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3689 return x86::psrlw(lhs, rhs); // FIXME: Fallback required
3692 RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
3694 return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value));
3697 RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs)
3699 return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value));
3702 RValue<UShort8> operator+=(const UShort8 &lhs, RValue<UShort8> rhs)
3704 return lhs = lhs + rhs;
3707 RValue<UShort8> operator~(RValue<UShort8> val)
3709 return RValue<UShort8>(Nucleus::createNot(val.value));
3712 RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
3714 Constant *pshufb[16];
3715 pshufb[0] = Nucleus::createConstantInt(select0 + 0);
3716 pshufb[1] = Nucleus::createConstantInt(select0 + 1);
3717 pshufb[2] = Nucleus::createConstantInt(select1 + 0);
3718 pshufb[3] = Nucleus::createConstantInt(select1 + 1);
3719 pshufb[4] = Nucleus::createConstantInt(select2 + 0);
3720 pshufb[5] = Nucleus::createConstantInt(select2 + 1);
3721 pshufb[6] = Nucleus::createConstantInt(select3 + 0);
3722 pshufb[7] = Nucleus::createConstantInt(select3 + 1);
3723 pshufb[8] = Nucleus::createConstantInt(select4 + 0);
3724 pshufb[9] = Nucleus::createConstantInt(select4 + 1);
3725 pshufb[10] = Nucleus::createConstantInt(select5 + 0);
3726 pshufb[11] = Nucleus::createConstantInt(select5 + 1);
3727 pshufb[12] = Nucleus::createConstantInt(select6 + 0);
3728 pshufb[13] = Nucleus::createConstantInt(select6 + 1);
3729 pshufb[14] = Nucleus::createConstantInt(select7 + 0);
3730 pshufb[15] = Nucleus::createConstantInt(select7 + 1);
3732 Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
3733 Value *shuffle = Nucleus::createShuffleVector(byte16, V(UndefValue::get(Byte16::getType())), V(Nucleus::createConstantVector(pshufb, 16)));
3734 Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
3736 return RValue<UShort8>(short8);
3739 RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3741 return x86::pmulhuw(x, y); // FIXME: Fallback required
3744 // FIXME: Implement as Shuffle(x, y, Select(i0, ..., i16)) and Shuffle(x, y, SELECT_PACK_REPEAT(element))
3745 // RValue<UShort8> PackRepeat(RValue<Byte16> x, RValue<Byte16> y, int element)
3747 // Constant *pshufb[16];
3748 // pshufb[0] = Nucleus::createConstantInt(element + 0);
3749 // pshufb[1] = Nucleus::createConstantInt(element + 0);
3750 // pshufb[2] = Nucleus::createConstantInt(element + 4);
3751 // pshufb[3] = Nucleus::createConstantInt(element + 4);
3752 // pshufb[4] = Nucleus::createConstantInt(element + 8);
3753 // pshufb[5] = Nucleus::createConstantInt(element + 8);
3754 // pshufb[6] = Nucleus::createConstantInt(element + 12);
3755 // pshufb[7] = Nucleus::createConstantInt(element + 12);
3756 // pshufb[8] = Nucleus::createConstantInt(element + 16);
3757 // pshufb[9] = Nucleus::createConstantInt(element + 16);
3758 // pshufb[10] = Nucleus::createConstantInt(element + 20);
3759 // pshufb[11] = Nucleus::createConstantInt(element + 20);
3760 // pshufb[12] = Nucleus::createConstantInt(element + 24);
3761 // pshufb[13] = Nucleus::createConstantInt(element + 24);
3762 // pshufb[14] = Nucleus::createConstantInt(element + 28);
3763 // pshufb[15] = Nucleus::createConstantInt(element + 28);
3765 // Value *shuffle = Nucleus::createShuffleVector(x.value, y.value, Nucleus::createConstantVector(pshufb, 16));
3766 // Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
3768 // return RValue<UShort8>(short8);
3771 Type *UShort8::getType()
3773 return T(VectorType::get(UShort::getType(), 8));
3776 Int::Int(Argument<Int> argument)
3778 storeValue(argument.value);
3781 Int::Int(RValue<Byte> cast)
3783 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3785 storeValue(integer);
3788 Int::Int(RValue<SByte> cast)
3790 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3792 storeValue(integer);
3795 Int::Int(RValue<Short> cast)
3797 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3799 storeValue(integer);
3802 Int::Int(RValue<UShort> cast)
3804 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3806 storeValue(integer);
3809 Int::Int(RValue<Int2> cast)
3811 *this = Extract(cast, 0);
3814 Int::Int(RValue<Long> cast)
3816 Value *integer = Nucleus::createTrunc(cast.value, Int::getType());
3818 storeValue(integer);
3821 Int::Int(RValue<Float> cast)
3823 Value *integer = Nucleus::createFPToSI(cast.value, Int::getType());
3825 storeValue(integer);
3834 storeValue(Nucleus::createConstantInt(x));
3837 Int::Int(RValue<Int> rhs)
3839 storeValue(rhs.value);
3842 Int::Int(RValue<UInt> rhs)
3844 storeValue(rhs.value);
3847 Int::Int(const Int &rhs)
3849 Value *value = rhs.loadValue();
3853 Int::Int(const Reference<Int> &rhs)
3855 Value *value = rhs.loadValue();
3859 Int::Int(const UInt &rhs)
3861 Value *value = rhs.loadValue();
3865 Int::Int(const Reference<UInt> &rhs)
3867 Value *value = rhs.loadValue();
3871 RValue<Int> Int::operator=(int rhs) const
3873 return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs)));
3876 RValue<Int> Int::operator=(RValue<Int> rhs) const
3878 storeValue(rhs.value);
3883 RValue<Int> Int::operator=(RValue<UInt> rhs) const
3885 storeValue(rhs.value);
3887 return RValue<Int>(rhs);
3890 RValue<Int> Int::operator=(const Int &rhs) const
3892 Value *value = rhs.loadValue();
3895 return RValue<Int>(value);
3898 RValue<Int> Int::operator=(const Reference<Int> &rhs) const
3900 Value *value = rhs.loadValue();
3903 return RValue<Int>(value);
3906 RValue<Int> Int::operator=(const UInt &rhs) const
3908 Value *value = rhs.loadValue();
3911 return RValue<Int>(value);
3914 RValue<Int> Int::operator=(const Reference<UInt> &rhs) const
3916 Value *value = rhs.loadValue();
3919 return RValue<Int>(value);
3922 RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs)
3924 return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value));
3927 RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs)
3929 return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value));
3932 RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs)
3934 return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value));
3937 RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs)
3939 return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value));
3942 RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs)
3944 return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value));
3947 RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs)
3949 return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value));
3952 RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs)
3954 return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value));
3957 RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs)
3959 return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value));
3962 RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs)
3964 return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value));
3967 RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs)
3969 return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value));
3972 RValue<Int> operator+=(const Int &lhs, RValue<Int> rhs)
3974 return lhs = lhs + rhs;
3977 RValue<Int> operator-=(const Int &lhs, RValue<Int> rhs)
3979 return lhs = lhs - rhs;
3982 RValue<Int> operator*=(const Int &lhs, RValue<Int> rhs)
3984 return lhs = lhs * rhs;
3987 RValue<Int> operator/=(const Int &lhs, RValue<Int> rhs)
3989 return lhs = lhs / rhs;
3992 RValue<Int> operator%=(const Int &lhs, RValue<Int> rhs)
3994 return lhs = lhs % rhs;
3997 RValue<Int> operator&=(const Int &lhs, RValue<Int> rhs)
3999 return lhs = lhs & rhs;
4002 RValue<Int> operator|=(const Int &lhs, RValue<Int> rhs)
4004 return lhs = lhs | rhs;
4007 RValue<Int> operator^=(const Int &lhs, RValue<Int> rhs)
4009 return lhs = lhs ^ rhs;
4012 RValue<Int> operator<<=(const Int &lhs, RValue<Int> rhs)
4014 return lhs = lhs << rhs;
4017 RValue<Int> operator>>=(const Int &lhs, RValue<Int> rhs)
4019 return lhs = lhs >> rhs;
4022 RValue<Int> operator+(RValue<Int> val)
4027 RValue<Int> operator-(RValue<Int> val)
4029 return RValue<Int>(Nucleus::createNeg(val.value));
4032 RValue<Int> operator~(RValue<Int> val)
4034 return RValue<Int>(Nucleus::createNot(val.value));
4037 RValue<Int> operator++(const Int &val, int) // Post-increment
4039 RValue<Int> res = val;
4041 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantInt(1)));
4042 val.storeValue(inc);
4047 const Int &operator++(const Int &val) // Pre-increment
4049 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantInt(1)));
4050 val.storeValue(inc);
4055 RValue<Int> operator--(const Int &val, int) // Post-decrement
4057 RValue<Int> res = val;
4059 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantInt(1)));
4060 val.storeValue(inc);
4065 const Int &operator--(const Int &val) // Pre-decrement
4067 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantInt(1)));
4068 val.storeValue(inc);
4073 RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs)
4075 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
4078 RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs)
4080 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
4083 RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs)
4085 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
4088 RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs)
4090 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
4093 RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs)
4095 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4098 RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs)
4100 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4103 RValue<Int> Max(RValue<Int> x, RValue<Int> y)
4105 return IfThenElse(x > y, x, y);
4108 RValue<Int> Min(RValue<Int> x, RValue<Int> y)
4110 return IfThenElse(x < y, x, y);
4113 RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max)
4115 return Min(Max(x, min), max);
4118 RValue<Int> RoundInt(RValue<Float> cast)
4120 return x86::cvtss2si(cast);
4122 // return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f));
4125 Type *Int::getType()
4127 return T(llvm::Type::getInt32Ty(*::context));
4130 Long::Long(RValue<Int> cast)
4132 Value *integer = Nucleus::createSExt(cast.value, Long::getType());
4134 storeValue(integer);
4137 Long::Long(RValue<UInt> cast)
4139 Value *integer = Nucleus::createZExt(cast.value, Long::getType());
4141 storeValue(integer);
4148 Long::Long(RValue<Long> rhs)
4150 storeValue(rhs.value);
4153 RValue<Long> Long::operator=(int64_t rhs) const
4155 return RValue<Long>(storeValue(Nucleus::createConstantInt(rhs)));
4158 RValue<Long> Long::operator=(RValue<Long> rhs) const
4160 storeValue(rhs.value);
4165 RValue<Long> Long::operator=(const Long &rhs) const
4167 Value *value = rhs.loadValue();
4170 return RValue<Long>(value);
4173 RValue<Long> Long::operator=(const Reference<Long> &rhs) const
4175 Value *value = rhs.loadValue();
4178 return RValue<Long>(value);
4181 RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs)
4183 return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value));
4186 RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs)
4188 return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
4191 RValue<Long> operator+=(const Long &lhs, RValue<Long> rhs)
4193 return lhs = lhs + rhs;
4196 RValue<Long> operator-=(const Long &lhs, RValue<Long> rhs)
4198 return lhs = lhs - rhs;
4201 RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y)
4203 return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value));
4206 Type *Long::getType()
4208 return T(llvm::Type::getInt64Ty(*::context));
4211 Long1::Long1(const RValue<UInt> cast)
4213 Value *undefCast = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), cast.value, 0);
4214 Value *zeroCast = Nucleus::createInsertElement(undefCast, V(Nucleus::createConstantInt(0)), 1);
4216 storeValue(Nucleus::createBitCast(zeroCast, Long1::getType()));
4219 Long1::Long1(RValue<Long1> rhs)
4221 storeValue(rhs.value);
4224 Type *Long1::getType()
4226 if(CPUID::supportsMMX2())
4228 return MMX::getType();
4232 return T(VectorType::get(Long::getType(), 1));
4236 RValue<Long2> UnpackHigh(RValue<Long2> x, RValue<Long2> y)
4238 Constant *shuffle[2];
4239 shuffle[0] = Nucleus::createConstantInt(1);
4240 shuffle[1] = Nucleus::createConstantInt(3);
4242 Value *packed = Nucleus::createShuffleVector(x.value, y.value, V(Nucleus::createConstantVector(shuffle, 2)));
4244 return RValue<Long2>(packed);
4247 Type *Long2::getType()
4249 return T(VectorType::get(Long::getType(), 2));
4252 UInt::UInt(Argument<UInt> argument)
4254 storeValue(argument.value);
4257 UInt::UInt(RValue<UShort> cast)
4259 Value *integer = Nucleus::createZExt(cast.value, UInt::getType());
4261 storeValue(integer);
4264 UInt::UInt(RValue<Long> cast)
4266 Value *integer = Nucleus::createTrunc(cast.value, UInt::getType());
4268 storeValue(integer);
4271 UInt::UInt(RValue<Float> cast)
4273 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
4274 // Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType());
4276 // Smallest positive value representable in UInt, but not in Int
4277 const unsigned int ustart = 0x80000000u;
4278 const float ustartf = float(ustart);
4280 // If the value is negative, store 0, otherwise store the result of the conversion
4281 storeValue((~(As<Int>(cast) >> 31) &
4282 // Check if the value can be represented as an Int
4283 IfThenElse(cast >= ustartf,
4284 // If the value is too large, subtract ustart and re-add it after conversion.
4285 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
4286 // Otherwise, just convert normally
4296 storeValue(Nucleus::createConstantInt(x));
4299 UInt::UInt(unsigned int x)
4301 storeValue(Nucleus::createConstantInt(x));
4304 UInt::UInt(RValue<UInt> rhs)
4306 storeValue(rhs.value);
4309 UInt::UInt(RValue<Int> rhs)
4311 storeValue(rhs.value);
4314 UInt::UInt(const UInt &rhs)
4316 Value *value = rhs.loadValue();
4320 UInt::UInt(const Reference<UInt> &rhs)
4322 Value *value = rhs.loadValue();
4326 UInt::UInt(const Int &rhs)
4328 Value *value = rhs.loadValue();
4332 UInt::UInt(const Reference<Int> &rhs)
4334 Value *value = rhs.loadValue();
4338 RValue<UInt> UInt::operator=(unsigned int rhs) const
4340 return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs)));
4343 RValue<UInt> UInt::operator=(RValue<UInt> rhs) const
4345 storeValue(rhs.value);
4350 RValue<UInt> UInt::operator=(RValue<Int> rhs) const
4352 storeValue(rhs.value);
4354 return RValue<UInt>(rhs);
4357 RValue<UInt> UInt::operator=(const UInt &rhs) const
4359 Value *value = rhs.loadValue();
4362 return RValue<UInt>(value);
4365 RValue<UInt> UInt::operator=(const Reference<UInt> &rhs) const
4367 Value *value = rhs.loadValue();
4370 return RValue<UInt>(value);
4373 RValue<UInt> UInt::operator=(const Int &rhs) const
4375 Value *value = rhs.loadValue();
4378 return RValue<UInt>(value);
4381 RValue<UInt> UInt::operator=(const Reference<Int> &rhs) const
4383 Value *value = rhs.loadValue();
4386 return RValue<UInt>(value);
4389 RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs)
4391 return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value));
4394 RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs)
4396 return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value));
4399 RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs)
4401 return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value));
4404 RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs)
4406 return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value));
4409 RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs)
4411 return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value));
4414 RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs)
4416 return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value));
4419 RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs)
4421 return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value));
4424 RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs)
4426 return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value));
4429 RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs)
4431 return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value));
4434 RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs)
4436 return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value));
4439 RValue<UInt> operator+=(const UInt &lhs, RValue<UInt> rhs)
4441 return lhs = lhs + rhs;
4444 RValue<UInt> operator-=(const UInt &lhs, RValue<UInt> rhs)
4446 return lhs = lhs - rhs;
4449 RValue<UInt> operator*=(const UInt &lhs, RValue<UInt> rhs)
4451 return lhs = lhs * rhs;
4454 RValue<UInt> operator/=(const UInt &lhs, RValue<UInt> rhs)
4456 return lhs = lhs / rhs;
4459 RValue<UInt> operator%=(const UInt &lhs, RValue<UInt> rhs)
4461 return lhs = lhs % rhs;
4464 RValue<UInt> operator&=(const UInt &lhs, RValue<UInt> rhs)
4466 return lhs = lhs & rhs;
4469 RValue<UInt> operator|=(const UInt &lhs, RValue<UInt> rhs)
4471 return lhs = lhs | rhs;
4474 RValue<UInt> operator^=(const UInt &lhs, RValue<UInt> rhs)
4476 return lhs = lhs ^ rhs;
4479 RValue<UInt> operator<<=(const UInt &lhs, RValue<UInt> rhs)
4481 return lhs = lhs << rhs;
4484 RValue<UInt> operator>>=(const UInt &lhs, RValue<UInt> rhs)
4486 return lhs = lhs >> rhs;
4489 RValue<UInt> operator+(RValue<UInt> val)
4494 RValue<UInt> operator-(RValue<UInt> val)
4496 return RValue<UInt>(Nucleus::createNeg(val.value));
4499 RValue<UInt> operator~(RValue<UInt> val)
4501 return RValue<UInt>(Nucleus::createNot(val.value));
4504 RValue<UInt> operator++(const UInt &val, int) // Post-increment
4506 RValue<UInt> res = val;
4508 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantInt(1)));
4509 val.storeValue(inc);
4514 const UInt &operator++(const UInt &val) // Pre-increment
4516 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantInt(1)));
4517 val.storeValue(inc);
4522 RValue<UInt> operator--(const UInt &val, int) // Post-decrement
4524 RValue<UInt> res = val;
4526 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantInt(1)));
4527 val.storeValue(inc);
4532 const UInt &operator--(const UInt &val) // Pre-decrement
4534 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantInt(1)));
4535 val.storeValue(inc);
4540 RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y)
4542 return IfThenElse(x > y, x, y);
4545 RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y)
4547 return IfThenElse(x < y, x, y);
4550 RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max)
4552 return Min(Max(x, min), max);
4555 RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs)
4557 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
4560 RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs)
4562 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
4565 RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs)
4567 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
4570 RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs)
4572 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
4575 RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs)
4577 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4580 RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs)
4582 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4585 // RValue<UInt> RoundUInt(RValue<Float> cast)
4587 // return x86::cvtss2si(val); // FIXME: Unsigned
4589 // // return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f));
4592 Type *UInt::getType()
4594 return T(llvm::Type::getInt32Ty(*::context));
4597 // Int2::Int2(RValue<Int> cast)
4599 // Value *extend = Nucleus::createZExt(cast.value, Long::getType());
4600 // Value *vector = Nucleus::createBitCast(extend, Int2::getType());
4602 // Constant *shuffle[2];
4603 // shuffle[0] = Nucleus::createConstantInt(0);
4604 // shuffle[1] = Nucleus::createConstantInt(0);
4606 // Value *replicate = Nucleus::createShuffleVector(vector, UndefValue::get(Int2::getType()), Nucleus::createConstantVector(shuffle, 2));
4608 // storeValue(replicate);
4611 Int2::Int2(RValue<Int4> cast)
4613 Value *long2 = Nucleus::createBitCast(cast.value, Long2::getType());
4614 Value *element = Nucleus::createExtractElement(long2, 0);
4615 Value *int2 = Nucleus::createBitCast(element, Int2::getType());
4622 // xy.parent = this;
4625 Int2::Int2(int x, int y)
4627 // xy.parent = this;
4629 Constant *constantVector[2];
4630 constantVector[0] = Nucleus::createConstantInt(x);
4631 constantVector[1] = Nucleus::createConstantInt(y);
4632 Value *vector = V(Nucleus::createConstantVector(constantVector, 2));
4634 storeValue(Nucleus::createBitCast(vector, getType()));
4637 Int2::Int2(RValue<Int2> rhs)
4639 // xy.parent = this;
4641 storeValue(rhs.value);
4644 Int2::Int2(const Int2 &rhs)
4646 // xy.parent = this;
4648 Value *value = rhs.loadValue();
4652 Int2::Int2(const Reference<Int2> &rhs)
4654 // xy.parent = this;
4656 Value *value = rhs.loadValue();
4660 Int2::Int2(RValue<Int> lo, RValue<Int> hi)
4662 if(CPUID::supportsMMX2())
4666 // punpckldq mm0, mm1
4667 storeValue(As<Int2>(UnpackLow(As<Int2>(Long1(RValue<UInt>(lo))), As<Int2>(Long1(RValue<UInt>(hi))))).value);
4671 Constant *shuffle[2];
4672 shuffle[0] = Nucleus::createConstantInt(0);
4673 shuffle[1] = Nucleus::createConstantInt(1);
4675 Value *packed = Nucleus::createShuffleVector(Nucleus::createBitCast(lo.value, T(VectorType::get(Int::getType(), 1))), Nucleus::createBitCast(hi.value, T(VectorType::get(Int::getType(), 1))), V(Nucleus::createConstantVector(shuffle, 2)));
4677 storeValue(Nucleus::createBitCast(packed, Int2::getType()));
4681 RValue<Int2> Int2::operator=(RValue<Int2> rhs) const
4683 storeValue(rhs.value);
4688 RValue<Int2> Int2::operator=(const Int2 &rhs) const
4690 Value *value = rhs.loadValue();
4693 return RValue<Int2>(value);
4696 RValue<Int2> Int2::operator=(const Reference<Int2> &rhs) const
4698 Value *value = rhs.loadValue();
4701 return RValue<Int2>(value);
4704 RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs)
4706 if(CPUID::supportsMMX2())
4708 return x86::paddd(lhs, rhs);
4712 return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
4716 RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs)
4718 if(CPUID::supportsMMX2())
4720 return x86::psubd(lhs, rhs);
4724 return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
4728 // RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs)
4730 // return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value));
4733 // RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs)
4735 // return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value));
4738 // RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs)
4740 // return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value));
4743 RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs)
4745 if(CPUID::supportsMMX2())
4747 return As<Int2>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
4751 return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
4755 RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs)
4757 if(CPUID::supportsMMX2())
4759 return As<Int2>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
4763 return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
4767 RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs)
4769 if(CPUID::supportsMMX2())
4771 return As<Int2>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
4775 return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
4779 RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
4781 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
4783 return x86::pslld(lhs, rhs);
4786 RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
4788 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
4790 return x86::psrad(lhs, rhs);
4793 RValue<Int2> operator<<(RValue<Int2> lhs, RValue<Long1> rhs)
4795 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
4797 return x86::pslld(lhs, rhs);
4800 RValue<Int2> operator>>(RValue<Int2> lhs, RValue<Long1> rhs)
4802 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
4804 return x86::psrad(lhs, rhs);
4807 RValue<Int2> operator+=(const Int2 &lhs, RValue<Int2> rhs)
4809 return lhs = lhs + rhs;
4812 RValue<Int2> operator-=(const Int2 &lhs, RValue<Int2> rhs)
4814 return lhs = lhs - rhs;
4817 // RValue<Int2> operator*=(const Int2 &lhs, RValue<Int2> rhs)
4819 // return lhs = lhs * rhs;
4822 // RValue<Int2> operator/=(const Int2 &lhs, RValue<Int2> rhs)
4824 // return lhs = lhs / rhs;
4827 // RValue<Int2> operator%=(const Int2 &lhs, RValue<Int2> rhs)
4829 // return lhs = lhs % rhs;
4832 RValue<Int2> operator&=(const Int2 &lhs, RValue<Int2> rhs)
4834 return lhs = lhs & rhs;
4837 RValue<Int2> operator|=(const Int2 &lhs, RValue<Int2> rhs)
4839 return lhs = lhs | rhs;
4842 RValue<Int2> operator^=(const Int2 &lhs, RValue<Int2> rhs)
4844 return lhs = lhs ^ rhs;
4847 RValue<Int2> operator<<=(const Int2 &lhs, unsigned char rhs)
4849 return lhs = lhs << rhs;
4852 RValue<Int2> operator>>=(const Int2 &lhs, unsigned char rhs)
4854 return lhs = lhs >> rhs;
4857 RValue<Int2> operator<<=(const Int2 &lhs, RValue<Long1> rhs)
4859 return lhs = lhs << rhs;
4862 RValue<Int2> operator>>=(const Int2 &lhs, RValue<Long1> rhs)
4864 return lhs = lhs >> rhs;
4867 // RValue<Int2> operator+(RValue<Int2> val)
4872 // RValue<Int2> operator-(RValue<Int2> val)
4874 // return RValue<Int2>(Nucleus::createNeg(val.value));
4877 RValue<Int2> operator~(RValue<Int2> val)
4879 if(CPUID::supportsMMX2())
4881 return val ^ Int2(0xFFFFFFFF, 0xFFFFFFFF);
4885 return RValue<Int2>(Nucleus::createNot(val.value));
4889 RValue<Long1> UnpackLow(RValue<Int2> x, RValue<Int2> y)
4891 if(CPUID::supportsMMX2())
4893 return x86::punpckldq(x, y);
4897 Constant *shuffle[2];
4898 shuffle[0] = Nucleus::createConstantInt(0);
4899 shuffle[1] = Nucleus::createConstantInt(2);
4901 Value *packed = Nucleus::createShuffleVector(x.value, y.value, V(Nucleus::createConstantVector(shuffle, 2)));
4903 return RValue<Long1>(Nucleus::createBitCast(packed, Long1::getType()));
4907 RValue<Long1> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
4909 if(CPUID::supportsMMX2())
4911 return x86::punpckhdq(x, y);
4915 Constant *shuffle[2];
4916 shuffle[0] = Nucleus::createConstantInt(1);
4917 shuffle[1] = Nucleus::createConstantInt(3);
4919 Value *packed = Nucleus::createShuffleVector(x.value, y.value, V(Nucleus::createConstantVector(shuffle, 2)));
4921 return RValue<Long1>(Nucleus::createBitCast(packed, Long1::getType()));
4925 RValue<Int> Extract(RValue<Int2> val, int i)
4927 if(false) // FIXME: LLVM does not generate optimal code
4929 return RValue<Int>(Nucleus::createExtractElement(val.value, i));
4935 return RValue<Int>(Nucleus::createExtractElement(Nucleus::createBitCast(val.value, T(VectorType::get(Int::getType(), 2))), 0));
4939 Int2 val2 = As<Int2>(UnpackHigh(val, val));
4941 return Extract(val2, 0);
4946 RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i)
4948 return RValue<Int2>(Nucleus::createBitCast(Nucleus::createInsertElement(Nucleus::createBitCast(val.value, T(VectorType::get(Int::getType(), 2))), element.value, i), Int2::getType()));
4951 Type *Int2::getType()
4953 if(CPUID::supportsMMX2())
4955 return MMX::getType();
4959 return T(VectorType::get(Int::getType(), 2));
4965 // xy.parent = this;
4968 UInt2::UInt2(unsigned int x, unsigned int y)
4970 // xy.parent = this;
4972 Constant *constantVector[2];
4973 constantVector[0] = Nucleus::createConstantInt(x);
4974 constantVector[1] = Nucleus::createConstantInt(y);
4975 Value *vector = V(Nucleus::createConstantVector(constantVector, 2));
4977 storeValue(Nucleus::createBitCast(vector, getType()));
4980 UInt2::UInt2(RValue<UInt2> rhs)
4982 // xy.parent = this;
4984 storeValue(rhs.value);
4987 UInt2::UInt2(const UInt2 &rhs)
4989 // xy.parent = this;
4991 Value *value = rhs.loadValue();
4995 UInt2::UInt2(const Reference<UInt2> &rhs)
4997 // xy.parent = this;
4999 Value *value = rhs.loadValue();
5003 RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs) const
5005 storeValue(rhs.value);
5010 RValue<UInt2> UInt2::operator=(const UInt2 &rhs) const
5012 Value *value = rhs.loadValue();
5015 return RValue<UInt2>(value);
5018 RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs) const
5020 Value *value = rhs.loadValue();
5023 return RValue<UInt2>(value);
5026 RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs)
5028 if(CPUID::supportsMMX2())
5030 return As<UInt2>(x86::paddd(As<Int2>(lhs), As<Int2>(rhs)));
5034 return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
5038 RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs)
5040 if(CPUID::supportsMMX2())
5042 return As<UInt2>(x86::psubd(As<Int2>(lhs), As<Int2>(rhs)));
5046 return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
5050 // RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs)
5052 // return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value));
5055 // RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs)
5057 // return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value));
5060 // RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs)
5062 // return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value));
5065 RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs)
5067 if(CPUID::supportsMMX2())
5069 return As<UInt2>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
5073 return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
5077 RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs)
5079 if(CPUID::supportsMMX2())
5081 return As<UInt2>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
5085 return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
5089 RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs)
5091 if(CPUID::supportsMMX2())
5093 return As<UInt2>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
5097 return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
5101 RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
5103 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
5105 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
5108 RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
5110 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
5112 return x86::psrld(lhs, rhs);
5115 RValue<UInt2> operator<<(RValue<UInt2> lhs, RValue<Long1> rhs)
5117 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
5119 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
5122 RValue<UInt2> operator>>(RValue<UInt2> lhs, RValue<Long1> rhs)
5124 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
5126 return x86::psrld(lhs, rhs);
5129 RValue<UInt2> operator+=(const UInt2 &lhs, RValue<UInt2> rhs)
5131 return lhs = lhs + rhs;
5134 RValue<UInt2> operator-=(const UInt2 &lhs, RValue<UInt2> rhs)
5136 return lhs = lhs - rhs;
5139 // RValue<UInt2> operator*=(const UInt2 &lhs, RValue<UInt2> rhs)
5141 // return lhs = lhs * rhs;
5144 // RValue<UInt2> operator/=(const UInt2 &lhs, RValue<UInt2> rhs)
5146 // return lhs = lhs / rhs;
5149 // RValue<UInt2> operator%=(const UInt2 &lhs, RValue<UInt2> rhs)
5151 // return lhs = lhs % rhs;
5154 RValue<UInt2> operator&=(const UInt2 &lhs, RValue<UInt2> rhs)
5156 return lhs = lhs & rhs;
5159 RValue<UInt2> operator|=(const UInt2 &lhs, RValue<UInt2> rhs)
5161 return lhs = lhs | rhs;
5164 RValue<UInt2> operator^=(const UInt2 &lhs, RValue<UInt2> rhs)
5166 return lhs = lhs ^ rhs;
5169 RValue<UInt2> operator<<=(const UInt2 &lhs, unsigned char rhs)
5171 return lhs = lhs << rhs;
5174 RValue<UInt2> operator>>=(const UInt2 &lhs, unsigned char rhs)
5176 return lhs = lhs >> rhs;
5179 RValue<UInt2> operator<<=(const UInt2 &lhs, RValue<Long1> rhs)
5181 return lhs = lhs << rhs;
5184 RValue<UInt2> operator>>=(const UInt2 &lhs, RValue<Long1> rhs)
5186 return lhs = lhs >> rhs;
5189 // RValue<UInt2> operator+(RValue<UInt2> val)
5194 // RValue<UInt2> operator-(RValue<UInt2> val)
5196 // return RValue<UInt2>(Nucleus::createNeg(val.value));
5199 RValue<UInt2> operator~(RValue<UInt2> val)
5201 if(CPUID::supportsMMX2())
5203 return val ^ UInt2(0xFFFFFFFF, 0xFFFFFFFF);
5207 return RValue<UInt2>(Nucleus::createNot(val.value));
5211 Type *UInt2::getType()
5213 if(CPUID::supportsMMX2())
5215 return MMX::getType();
5219 return T(VectorType::get(UInt::getType(), 2));
5223 Int4::Int4(RValue<Byte4> cast)
5225 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
5226 Value *a = Nucleus::createInsertElement(V(UndefValue::get(Int4::getType())), x, 0);
5230 if (CPUID::supportsSSE4_1())
5232 e = x86::pmovzxbd(RValue<Int4>(a)).value;
5236 Constant *swizzle[16];
5237 swizzle[0] = Nucleus::createConstantInt(0);
5238 swizzle[1] = Nucleus::createConstantInt(16);
5239 swizzle[2] = Nucleus::createConstantInt(1);
5240 swizzle[3] = Nucleus::createConstantInt(17);
5241 swizzle[4] = Nucleus::createConstantInt(2);
5242 swizzle[5] = Nucleus::createConstantInt(18);
5243 swizzle[6] = Nucleus::createConstantInt(3);
5244 swizzle[7] = Nucleus::createConstantInt(19);
5245 swizzle[8] = Nucleus::createConstantInt(4);
5246 swizzle[9] = Nucleus::createConstantInt(20);
5247 swizzle[10] = Nucleus::createConstantInt(5);
5248 swizzle[11] = Nucleus::createConstantInt(21);
5249 swizzle[12] = Nucleus::createConstantInt(6);
5250 swizzle[13] = Nucleus::createConstantInt(22);
5251 swizzle[14] = Nucleus::createConstantInt(7);
5252 swizzle[15] = Nucleus::createConstantInt(23);
5254 Value *b = Nucleus::createBitCast(a, Byte16::getType());
5255 Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Byte16::getType())), V(Nucleus::createConstantVector(swizzle, 16)));
5257 Constant *swizzle2[8];
5258 swizzle2[0] = Nucleus::createConstantInt(0);
5259 swizzle2[1] = Nucleus::createConstantInt(8);
5260 swizzle2[2] = Nucleus::createConstantInt(1);
5261 swizzle2[3] = Nucleus::createConstantInt(9);
5262 swizzle2[4] = Nucleus::createConstantInt(2);
5263 swizzle2[5] = Nucleus::createConstantInt(10);
5264 swizzle2[6] = Nucleus::createConstantInt(3);
5265 swizzle2[7] = Nucleus::createConstantInt(11);
5267 Value *d = Nucleus::createBitCast(c, Short8::getType());
5268 e = Nucleus::createShuffleVector(d, V(Nucleus::createNullValue(Short8::getType())), V(Nucleus::createConstantVector(swizzle2, 8)));
5271 Value *f = Nucleus::createBitCast(e, Int4::getType());
5275 Int4::Int4(RValue<SByte4> cast)
5277 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
5278 Value *a = Nucleus::createInsertElement(V(UndefValue::get(Int4::getType())), x, 0);
5282 if (CPUID::supportsSSE4_1())
5284 g = x86::pmovsxbd(RValue<Int4>(a)).value;
5288 Constant *swizzle[16];
5289 swizzle[0] = Nucleus::createConstantInt(0);
5290 swizzle[1] = Nucleus::createConstantInt(0);
5291 swizzle[2] = Nucleus::createConstantInt(1);
5292 swizzle[3] = Nucleus::createConstantInt(1);
5293 swizzle[4] = Nucleus::createConstantInt(2);
5294 swizzle[5] = Nucleus::createConstantInt(2);
5295 swizzle[6] = Nucleus::createConstantInt(3);
5296 swizzle[7] = Nucleus::createConstantInt(3);
5297 swizzle[8] = Nucleus::createConstantInt(4);
5298 swizzle[9] = Nucleus::createConstantInt(4);
5299 swizzle[10] = Nucleus::createConstantInt(5);
5300 swizzle[11] = Nucleus::createConstantInt(5);
5301 swizzle[12] = Nucleus::createConstantInt(6);
5302 swizzle[13] = Nucleus::createConstantInt(6);
5303 swizzle[14] = Nucleus::createConstantInt(7);
5304 swizzle[15] = Nucleus::createConstantInt(7);
5306 Value *b = Nucleus::createBitCast(a, Byte16::getType());
5307 Value *c = Nucleus::createShuffleVector(b, b, V(Nucleus::createConstantVector(swizzle, 16)));
5309 Constant *swizzle2[8];
5310 swizzle2[0] = Nucleus::createConstantInt(0);
5311 swizzle2[1] = Nucleus::createConstantInt(0);
5312 swizzle2[2] = Nucleus::createConstantInt(1);
5313 swizzle2[3] = Nucleus::createConstantInt(1);
5314 swizzle2[4] = Nucleus::createConstantInt(2);
5315 swizzle2[5] = Nucleus::createConstantInt(2);
5316 swizzle2[6] = Nucleus::createConstantInt(3);
5317 swizzle2[7] = Nucleus::createConstantInt(3);
5319 Value *d = Nucleus::createBitCast(c, Short8::getType());
5320 Value *e = Nucleus::createShuffleVector(d, d, V(Nucleus::createConstantVector(swizzle2, 8)));
5322 Value *f = Nucleus::createBitCast(e, Int4::getType());
5323 // g = Nucleus::createAShr(f, Nucleus::createConstantInt(24));
5324 g = x86::psrad(RValue<Int4>(f), 24).value;
5330 Int4::Int4(RValue<Float4> cast)
5332 // xyzw.parent = this;
5334 Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType());
5339 Int4::Int4(RValue<Short4> cast)
5341 Value *long2 = V(UndefValue::get(Long2::getType()));
5342 Value *element = Nucleus::createBitCast(cast.value, Long::getType());
5343 long2 = Nucleus::createInsertElement(long2, element, 0);
5344 RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType()));
5346 if(CPUID::supportsSSE4_1())
5348 storeValue(x86::pmovsxwd(vector).value);
5352 Value *b = Nucleus::createBitCast(vector.value, Short8::getType());
5354 Constant *swizzle[8];
5355 swizzle[0] = Nucleus::createConstantInt(0);
5356 swizzle[1] = Nucleus::createConstantInt(0);
5357 swizzle[2] = Nucleus::createConstantInt(1);
5358 swizzle[3] = Nucleus::createConstantInt(1);
5359 swizzle[4] = Nucleus::createConstantInt(2);
5360 swizzle[5] = Nucleus::createConstantInt(2);
5361 swizzle[6] = Nucleus::createConstantInt(3);
5362 swizzle[7] = Nucleus::createConstantInt(3);
5364 Value *c = Nucleus::createShuffleVector(b, b, V(Nucleus::createConstantVector(swizzle, 8)));
5365 Value *d = Nucleus::createBitCast(c, Int4::getType());
5368 // Each Short is packed into each Int in the (Short | Short) format.
5369 // Shifting by 16 will retrieve the original Short value.
5370 // Shitfing an Int will propagate the sign bit, which will work
5371 // for both positive and negative values of a Short.
5376 Int4::Int4(RValue<UShort4> cast)
5378 Value *long2 = V(UndefValue::get(Long2::getType()));
5379 Value *element = Nucleus::createBitCast(cast.value, Long::getType());
5380 long2 = Nucleus::createInsertElement(long2, element, 0);
5381 RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType()));
5383 if(CPUID::supportsSSE4_1())
5385 storeValue(x86::pmovzxwd(RValue<Int4>(vector)).value);
5389 Value *b = Nucleus::createBitCast(vector.value, Short8::getType());
5391 Constant *swizzle[8];
5392 swizzle[0] = Nucleus::createConstantInt(0);
5393 swizzle[1] = Nucleus::createConstantInt(8);
5394 swizzle[2] = Nucleus::createConstantInt(1);
5395 swizzle[3] = Nucleus::createConstantInt(9);
5396 swizzle[4] = Nucleus::createConstantInt(2);
5397 swizzle[5] = Nucleus::createConstantInt(10);
5398 swizzle[6] = Nucleus::createConstantInt(3);
5399 swizzle[7] = Nucleus::createConstantInt(11);
5401 Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Short8::getType())), V(Nucleus::createConstantVector(swizzle, 8)));
5402 Value *d = Nucleus::createBitCast(c, Int4::getType());
5409 // xyzw.parent = this;
5412 Int4::Int4(int xyzw)
5414 constant(xyzw, xyzw, xyzw, xyzw);
5417 Int4::Int4(int x, int yzw)
5419 constant(x, yzw, yzw, yzw);
5422 Int4::Int4(int x, int y, int zw)
5424 constant(x, y, zw, zw);
5427 Int4::Int4(int x, int y, int z, int w)
5429 constant(x, y, z, w);
5432 void Int4::constant(int x, int y, int z, int w)
5434 // xyzw.parent = this;
5436 Constant *constantVector[4];
5437 constantVector[0] = Nucleus::createConstantInt(x);
5438 constantVector[1] = Nucleus::createConstantInt(y);
5439 constantVector[2] = Nucleus::createConstantInt(z);
5440 constantVector[3] = Nucleus::createConstantInt(w);
5442 storeValue(Nucleus::createConstantVector(constantVector, 4));
5445 Int4::Int4(RValue<Int4> rhs)
5447 // xyzw.parent = this;
5449 storeValue(rhs.value);
5452 Int4::Int4(const Int4 &rhs)
5454 // xyzw.parent = this;
5456 Value *value = rhs.loadValue();
5460 Int4::Int4(const Reference<Int4> &rhs)
5462 // xyzw.parent = this;
5464 Value *value = rhs.loadValue();
5468 Int4::Int4(RValue<UInt4> rhs)
5470 // xyzw.parent = this;
5472 storeValue(rhs.value);
5475 Int4::Int4(const UInt4 &rhs)
5477 // xyzw.parent = this;
5479 Value *value = rhs.loadValue();
5483 Int4::Int4(const Reference<UInt4> &rhs)
5485 // xyzw.parent = this;
5487 Value *value = rhs.loadValue();
5491 Int4::Int4(RValue<Int2> lo, RValue<Int2> hi)
5493 // xyzw.parent = this;
5495 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
5496 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
5498 Value *long2 = V(UndefValue::get(Long2::getType()));
5499 long2 = Nucleus::createInsertElement(long2, loLong, 0);
5500 long2 = Nucleus::createInsertElement(long2, hiLong, 1);
5501 Value *int4 = Nucleus::createBitCast(long2, Int4::getType());
5506 Int4::Int4(RValue<Int> rhs)
5508 // xyzw.parent = this;
5510 Value *vector = loadValue();
5511 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
5513 Constant *swizzle[4];
5514 swizzle[0] = Nucleus::createConstantInt(0);
5515 swizzle[1] = Nucleus::createConstantInt(0);
5516 swizzle[2] = Nucleus::createConstantInt(0);
5517 swizzle[3] = Nucleus::createConstantInt(0);
5519 Value *replicate = Nucleus::createShuffleVector(insert, V(UndefValue::get(Int4::getType())), V(Nucleus::createConstantVector(swizzle, 4)));
5521 storeValue(replicate);
5524 Int4::Int4(const Int &rhs)
5526 // xyzw.parent = this;
5528 *this = RValue<Int>(rhs.loadValue());
5531 Int4::Int4(const Reference<Int> &rhs)
5533 // xyzw.parent = this;
5535 *this = RValue<Int>(rhs.loadValue());
5538 RValue<Int4> Int4::operator=(RValue<Int4> rhs) const
5540 storeValue(rhs.value);
5545 RValue<Int4> Int4::operator=(const Int4 &rhs) const
5547 Value *value = rhs.loadValue();
5550 return RValue<Int4>(value);
5553 RValue<Int4> Int4::operator=(const Reference<Int4> &rhs) const
5555 Value *value = rhs.loadValue();
5558 return RValue<Int4>(value);
5561 RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs)
5563 return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value));
5566 RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs)
5568 return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value));
5571 RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs)
5573 return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value));
5576 RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs)
5578 return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value));
5581 RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs)
5583 return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value));
5586 RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs)
5588 return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value));
5591 RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs)
5593 return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value));
5596 RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs)
5598 return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value));
5601 RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
5603 return x86::pslld(lhs, rhs);
5606 RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
5608 return x86::psrad(lhs, rhs);
5611 RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
5613 return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value));
5616 RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs)
5618 return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value));
5621 RValue<Int4> operator+=(const Int4 &lhs, RValue<Int4> rhs)
5623 return lhs = lhs + rhs;
5626 RValue<Int4> operator-=(const Int4 &lhs, RValue<Int4> rhs)
5628 return lhs = lhs - rhs;
5631 RValue<Int4> operator*=(const Int4 &lhs, RValue<Int4> rhs)
5633 return lhs = lhs * rhs;
5636 // RValue<Int4> operator/=(const Int4 &lhs, RValue<Int4> rhs)
5638 // return lhs = lhs / rhs;
5641 // RValue<Int4> operator%=(const Int4 &lhs, RValue<Int4> rhs)
5643 // return lhs = lhs % rhs;
5646 RValue<Int4> operator&=(const Int4 &lhs, RValue<Int4> rhs)
5648 return lhs = lhs & rhs;
5651 RValue<Int4> operator|=(const Int4 &lhs, RValue<Int4> rhs)
5653 return lhs = lhs | rhs;
5656 RValue<Int4> operator^=(const Int4 &lhs, RValue<Int4> rhs)
5658 return lhs = lhs ^ rhs;
5661 RValue<Int4> operator<<=(const Int4 &lhs, unsigned char rhs)
5663 return lhs = lhs << rhs;
5666 RValue<Int4> operator>>=(const Int4 &lhs, unsigned char rhs)
5668 return lhs = lhs >> rhs;
5671 RValue<Int4> operator+(RValue<Int4> val)
5676 RValue<Int4> operator-(RValue<Int4> val)
5678 return RValue<Int4>(Nucleus::createNeg(val.value));
5681 RValue<Int4> operator~(RValue<Int4> val)
5683 return RValue<Int4>(Nucleus::createNot(val.value));
5686 RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
5688 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5689 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5690 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
5691 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5694 RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
5696 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType()));
5699 RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
5701 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5702 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5703 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType()));
5704 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5707 RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
5709 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
5712 RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
5714 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5715 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5716 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType()));
5717 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5720 RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
5722 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType()));
5725 RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
5727 if(CPUID::supportsSSE4_1())
5729 return x86::pmaxsd(x, y);
5733 RValue<Int4> greater = CmpNLE(x, y);
5734 return x & greater | y & ~greater;
5738 RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
5740 if(CPUID::supportsSSE4_1())
5742 return x86::pminsd(x, y);
5746 RValue<Int4> less = CmpLT(x, y);
5747 return x & less | y & ~less;
5751 RValue<Int4> RoundInt(RValue<Float4> cast)
5753 return x86::cvtps2dq(cast);
5756 RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y)
5758 return x86::packssdw(x, y);
5761 RValue<Int> Extract(RValue<Int4> x, int i)
5763 return RValue<Int>(Nucleus::createExtractElement(x.value, i));
5766 RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i)
5768 return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i));
5771 RValue<Int> SignMask(RValue<Int4> x)
5773 return x86::movmskps(As<Float4>(x));
5776 RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select)
5778 return RValue<Int4>(Nucleus::createSwizzle(x.value, select));
5781 Type *Int4::getType()
5783 return T(VectorType::get(Int::getType(), 4));
5786 UInt4::UInt4(RValue<Float4> cast)
5788 // xyzw.parent = this;
5790 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
5791 // Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
5793 // Smallest positive value representable in UInt, but not in Int
5794 const unsigned int ustart = 0x80000000u;
5795 const float ustartf = float(ustart);
5797 // Check if the value can be represented as an Int
5798 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
5799 // If the value is too large, subtract ustart and re-add it after conversion.
5800 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
5801 // Otherwise, just convert normally
5802 (~uiValue & Int4(cast));
5803 // If the value is negative, store 0, otherwise store the result of the conversion
5804 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
5809 // xyzw.parent = this;
5812 UInt4::UInt4(int xyzw)
5814 constant(xyzw, xyzw, xyzw, xyzw);
5817 UInt4::UInt4(int x, int yzw)
5819 constant(x, yzw, yzw, yzw);
5822 UInt4::UInt4(int x, int y, int zw)
5824 constant(x, y, zw, zw);
5827 UInt4::UInt4(int x, int y, int z, int w)
5829 constant(x, y, z, w);
5832 void UInt4::constant(int x, int y, int z, int w)
5834 // xyzw.parent = this;
5836 Constant *constantVector[4];
5837 constantVector[0] = Nucleus::createConstantInt(x);
5838 constantVector[1] = Nucleus::createConstantInt(y);
5839 constantVector[2] = Nucleus::createConstantInt(z);
5840 constantVector[3] = Nucleus::createConstantInt(w);
5842 storeValue(Nucleus::createConstantVector(constantVector, 4));
5845 UInt4::UInt4(RValue<UInt4> rhs)
5847 // xyzw.parent = this;
5849 storeValue(rhs.value);
5852 UInt4::UInt4(const UInt4 &rhs)
5854 // xyzw.parent = this;
5856 Value *value = rhs.loadValue();
5860 UInt4::UInt4(const Reference<UInt4> &rhs)
5862 // xyzw.parent = this;
5864 Value *value = rhs.loadValue();
5868 UInt4::UInt4(RValue<Int4> rhs)
5870 // xyzw.parent = this;
5872 storeValue(rhs.value);
5875 UInt4::UInt4(const Int4 &rhs)
5877 // xyzw.parent = this;
5879 Value *value = rhs.loadValue();
5883 UInt4::UInt4(const Reference<Int4> &rhs)
5885 // xyzw.parent = this;
5887 Value *value = rhs.loadValue();
5891 UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi)
5893 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
5894 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
5896 Value *long2 = V(UndefValue::get(Long2::getType()));
5897 long2 = Nucleus::createInsertElement(long2, loLong, 0);
5898 long2 = Nucleus::createInsertElement(long2, hiLong, 1);
5899 Value *uint4 = Nucleus::createBitCast(long2, Int4::getType());
5904 RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs) const
5906 storeValue(rhs.value);
5911 RValue<UInt4> UInt4::operator=(const UInt4 &rhs) const
5913 Value *value = rhs.loadValue();
5916 return RValue<UInt4>(value);
5919 RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs) const
5921 Value *value = rhs.loadValue();
5924 return RValue<UInt4>(value);
5927 RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs)
5929 return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value));
5932 RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs)
5934 return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value));
5937 RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs)
5939 return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value));
5942 RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs)
5944 return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value));
5947 RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs)
5949 return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value));
5952 RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs)
5954 return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value));
5957 RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs)
5959 return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value));
5962 RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs)
5964 return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
5967 RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
5969 return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
5972 RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
5974 return x86::psrld(lhs, rhs);
5977 RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)
5979 return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value));
5982 RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs)
5984 return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value));
5987 RValue<UInt4> operator+=(const UInt4 &lhs, RValue<UInt4> rhs)
5989 return lhs = lhs + rhs;
5992 RValue<UInt4> operator-=(const UInt4 &lhs, RValue<UInt4> rhs)
5994 return lhs = lhs - rhs;
5997 RValue<UInt4> operator*=(const UInt4 &lhs, RValue<UInt4> rhs)
5999 return lhs = lhs * rhs;
6002 // RValue<UInt4> operator/=(const UInt4 &lhs, RValue<UInt4> rhs)
6004 // return lhs = lhs / rhs;
6007 // RValue<UInt4> operator%=(const UInt4 &lhs, RValue<UInt4> rhs)
6009 // return lhs = lhs % rhs;
6012 RValue<UInt4> operator&=(const UInt4 &lhs, RValue<UInt4> rhs)
6014 return lhs = lhs & rhs;
6017 RValue<UInt4> operator|=(const UInt4 &lhs, RValue<UInt4> rhs)
6019 return lhs = lhs | rhs;
6022 RValue<UInt4> operator^=(const UInt4 &lhs, RValue<UInt4> rhs)
6024 return lhs = lhs ^ rhs;
6027 RValue<UInt4> operator<<=(const UInt4 &lhs, unsigned char rhs)
6029 return lhs = lhs << rhs;
6032 RValue<UInt4> operator>>=(const UInt4 &lhs, unsigned char rhs)
6034 return lhs = lhs >> rhs;
6037 RValue<UInt4> operator+(RValue<UInt4> val)
6042 RValue<UInt4> operator-(RValue<UInt4> val)
6044 return RValue<UInt4>(Nucleus::createNeg(val.value));
6047 RValue<UInt4> operator~(RValue<UInt4> val)
6049 return RValue<UInt4>(Nucleus::createNot(val.value));
6052 RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
6054 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
6055 // Restore the following line when LLVM is updated to a version where this issue is fixed.
6056 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
6057 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
6060 RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
6062 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType()));
6065 RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
6067 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
6068 // Restore the following line when LLVM is updated to a version where this issue is fixed.
6069 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType()));
6070 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
6073 RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
6075 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
6078 RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
6080 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
6081 // Restore the following line when LLVM is updated to a version where this issue is fixed.
6082 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType()));
6083 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
6086 RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
6088 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType()));
6091 RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
6093 if(CPUID::supportsSSE4_1())
6095 return x86::pmaxud(x, y);
6099 RValue<UInt4> greater = CmpNLE(x, y);
6100 return x & greater | y & ~greater;
6104 RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
6106 if(CPUID::supportsSSE4_1())
6108 return x86::pminud(x, y);
6112 RValue<UInt4> less = CmpLT(x, y);
6113 return x & less | y & ~less;
6117 RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y)
6119 return x86::packusdw(x, y); // FIXME: Fallback required
6122 Type *UInt4::getType()
6124 return T(VectorType::get(UInt::getType(), 4));
6127 Float::Float(RValue<Int> cast)
6129 Value *integer = Nucleus::createSIToFP(cast.value, Float::getType());
6131 storeValue(integer);
6139 Float::Float(float x)
6141 storeValue(Nucleus::createConstantFloat(x));
6144 Float::Float(RValue<Float> rhs)
6146 storeValue(rhs.value);
6149 Float::Float(const Float &rhs)
6151 Value *value = rhs.loadValue();
6155 Float::Float(const Reference<Float> &rhs)
6157 Value *value = rhs.loadValue();
6161 RValue<Float> Float::operator=(RValue<Float> rhs) const
6163 storeValue(rhs.value);
6168 RValue<Float> Float::operator=(const Float &rhs) const
6170 Value *value = rhs.loadValue();
6173 return RValue<Float>(value);
6176 RValue<Float> Float::operator=(const Reference<Float> &rhs) const
6178 Value *value = rhs.loadValue();
6181 return RValue<Float>(value);
6184 RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs)
6186 return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value));
6189 RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs)
6191 return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value));
6194 RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs)
6196 return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value));
6199 RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs)
6201 return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value));
6204 RValue<Float> operator+=(const Float &lhs, RValue<Float> rhs)
6206 return lhs = lhs + rhs;
6209 RValue<Float> operator-=(const Float &lhs, RValue<Float> rhs)
6211 return lhs = lhs - rhs;
6214 RValue<Float> operator*=(const Float &lhs, RValue<Float> rhs)
6216 return lhs = lhs * rhs;
6219 RValue<Float> operator/=(const Float &lhs, RValue<Float> rhs)
6221 return lhs = lhs / rhs;
6224 RValue<Float> operator+(RValue<Float> val)
6229 RValue<Float> operator-(RValue<Float> val)
6231 return RValue<Float>(Nucleus::createFNeg(val.value));
6234 RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs)
6236 return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value));
6239 RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs)
6241 return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value));
6244 RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs)
6246 return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value));
6249 RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs)
6251 return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value));
6254 RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs)
6256 return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value));
6259 RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs)
6261 return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value));
6264 RValue<Float> Abs(RValue<Float> x)
6266 return IfThenElse(x > 0.0f, x, -x);
6269 RValue<Float> Max(RValue<Float> x, RValue<Float> y)
6271 return IfThenElse(x > y, x, y);
6274 RValue<Float> Min(RValue<Float> x, RValue<Float> y)
6276 return IfThenElse(x < y, x, y);
6279 RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
6283 // rcpss uses a piecewise-linear approximation which minimizes the relative error
6284 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
6285 return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
6289 return x86::rcpss(x);
6293 RValue<Float> RcpSqrt_pp(RValue<Float> x)
6295 return x86::rsqrtss(x);
6298 RValue<Float> Sqrt(RValue<Float> x)
6300 return x86::sqrtss(x);
6303 RValue<Float> Round(RValue<Float> x)
6305 if(CPUID::supportsSSE4_1())
6307 return x86::roundss(x, 0);
6311 return Float4(Round(Float4(x))).x;
6315 RValue<Float> Trunc(RValue<Float> x)
6317 if(CPUID::supportsSSE4_1())
6319 return x86::roundss(x, 3);
6323 return Float(Int(x)); // Rounded toward zero
6327 RValue<Float> Frac(RValue<Float> x)
6329 if(CPUID::supportsSSE4_1())
6331 return x - x86::floorss(x);
6335 return Float4(Frac(Float4(x))).x;
6339 RValue<Float> Floor(RValue<Float> x)
6341 if(CPUID::supportsSSE4_1())
6343 return x86::floorss(x);
6347 return Float4(Floor(Float4(x))).x;
6351 RValue<Float> Ceil(RValue<Float> x)
6353 if(CPUID::supportsSSE4_1())
6355 return x86::ceilss(x);
6359 return Float4(Ceil(Float4(x))).x;
6363 Type *Float::getType()
6365 return T(llvm::Type::getFloatTy(*::context));
6368 Float2::Float2(RValue<Float4> cast)
6370 // xyzw.parent = this;
6372 Value *int64x2 = Nucleus::createBitCast(cast.value, Long2::getType());
6373 Value *int64 = Nucleus::createExtractElement(int64x2, 0);
6374 Value *float2 = Nucleus::createBitCast(int64, Float2::getType());
6379 Type *Float2::getType()
6381 return T(VectorType::get(Float::getType(), 2));
6384 Float4::Float4(RValue<Byte4> cast)
6389 Value *xyzw = Nucleus::createUIToFP(cast.value, Float4::getType()); // FIXME: Crashes
6391 Value *vector = loadValue();
6393 Value *i8x = Nucleus::createExtractElement(cast.value, 0);
6394 Value *f32x = Nucleus::createUIToFP(i8x, Float::getType());
6395 Value *x = Nucleus::createInsertElement(vector, f32x, 0);
6397 Value *i8y = Nucleus::createExtractElement(cast.value, V(Nucleus::createConstantInt(1)));
6398 Value *f32y = Nucleus::createUIToFP(i8y, Float::getType());
6399 Value *xy = Nucleus::createInsertElement(x, f32y, V(Nucleus::createConstantInt(1)));
6401 Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
6402 Value *f32z = Nucleus::createUIToFP(i8z, Float::getType());
6403 Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
6405 Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
6406 Value *f32w = Nucleus::createUIToFP(i8w, Float::getType());
6407 Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
6409 Value *a = Int4(cast).loadValue();
6410 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
6416 Float4::Float4(RValue<SByte4> cast)
6421 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType()); // FIXME: Crashes
6423 Value *vector = loadValue();
6425 Value *i8x = Nucleus::createExtractElement(cast.value, 0);
6426 Value *f32x = Nucleus::createSIToFP(i8x, Float::getType());
6427 Value *x = Nucleus::createInsertElement(vector, f32x, 0);
6429 Value *i8y = Nucleus::createExtractElement(cast.value, V(Nucleus::createConstantInt(1)));
6430 Value *f32y = Nucleus::createSIToFP(i8y, Float::getType());
6431 Value *xy = Nucleus::createInsertElement(x, f32y, V(Nucleus::createConstantInt(1)));
6433 Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
6434 Value *f32z = Nucleus::createSIToFP(i8z, Float::getType());
6435 Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
6437 Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
6438 Value *f32w = Nucleus::createSIToFP(i8w, Float::getType());
6439 Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
6441 Value *a = Int4(cast).loadValue();
6442 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
6448 Float4::Float4(RValue<Short4> cast)
6453 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
6456 Float4::Float4(RValue<UShort4> cast)
6461 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
6464 Float4::Float4(RValue<Int4> cast)
6468 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());
6473 Float4::Float4(RValue<UInt4> cast)
6477 Value *xyzw = Nucleus::createUIToFP(cast.value, Float4::getType());
6487 Float4::Float4(float xyzw)
6489 constant(xyzw, xyzw, xyzw, xyzw);
6492 Float4::Float4(float x, float yzw)
6494 constant(x, yzw, yzw, yzw);
6497 Float4::Float4(float x, float y, float zw)
6499 constant(x, y, zw, zw);
6502 Float4::Float4(float x, float y, float z, float w)
6504 constant(x, y, z, w);
6507 void Float4::constant(float x, float y, float z, float w)
6511 Constant *constantVector[4];
6512 constantVector[0] = Nucleus::createConstantFloat(x);
6513 constantVector[1] = Nucleus::createConstantFloat(y);
6514 constantVector[2] = Nucleus::createConstantFloat(z);
6515 constantVector[3] = Nucleus::createConstantFloat(w);
6517 storeValue(Nucleus::createConstantVector(constantVector, 4));
6520 Float4::Float4(RValue<Float4> rhs)
6524 storeValue(rhs.value);
6527 Float4::Float4(const Float4 &rhs)
6531 Value *value = rhs.loadValue();
6535 Float4::Float4(const Reference<Float4> &rhs)
6539 Value *value = rhs.loadValue();
6543 Float4::Float4(RValue<Float> rhs)
6547 Value *vector = loadValue();
6548 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
6550 Constant *swizzle[4];
6551 swizzle[0] = Nucleus::createConstantInt(0);
6552 swizzle[1] = Nucleus::createConstantInt(0);
6553 swizzle[2] = Nucleus::createConstantInt(0);
6554 swizzle[3] = Nucleus::createConstantInt(0);
6556 Value *replicate = Nucleus::createShuffleVector(insert, V(UndefValue::get(Float4::getType())), V(Nucleus::createConstantVector(swizzle, 4)));
6558 storeValue(replicate);
6561 Float4::Float4(const Float &rhs)
6565 *this = RValue<Float>(rhs.loadValue());
6568 Float4::Float4(const Reference<Float> &rhs)
6572 *this = RValue<Float>(rhs.loadValue());
6575 RValue<Float4> Float4::operator=(float x) const
6577 return *this = Float4(x, x, x, x);
6580 RValue<Float4> Float4::operator=(RValue<Float4> rhs) const
6582 storeValue(rhs.value);
6587 RValue<Float4> Float4::operator=(const Float4 &rhs) const
6589 Value *value = rhs.loadValue();
6592 return RValue<Float4>(value);
6595 RValue<Float4> Float4::operator=(const Reference<Float4> &rhs) const
6597 Value *value = rhs.loadValue();
6600 return RValue<Float4>(value);
6603 RValue<Float4> Float4::operator=(RValue<Float> rhs) const
6605 return *this = Float4(rhs);
6608 RValue<Float4> Float4::operator=(const Float &rhs) const
6610 return *this = Float4(rhs);
6613 RValue<Float4> Float4::operator=(const Reference<Float> &rhs) const
6615 return *this = Float4(rhs);
6618 RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs)
6620 return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value));
6623 RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs)
6625 return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value));
6628 RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs)
6630 return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value));
6633 RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs)
6635 return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value));
6638 RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
6640 return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value));
6643 RValue<Float4> operator+=(const Float4 &lhs, RValue<Float4> rhs)
6645 return lhs = lhs + rhs;
6648 RValue<Float4> operator-=(const Float4 &lhs, RValue<Float4> rhs)
6650 return lhs = lhs - rhs;
6653 RValue<Float4> operator*=(const Float4 &lhs, RValue<Float4> rhs)
6655 return lhs = lhs * rhs;
6658 RValue<Float4> operator/=(const Float4 &lhs, RValue<Float4> rhs)
6660 return lhs = lhs / rhs;
6663 RValue<Float4> operator%=(const Float4 &lhs, RValue<Float4> rhs)
6665 return lhs = lhs % rhs;
6668 RValue<Float4> operator+(RValue<Float4> val)
6673 RValue<Float4> operator-(RValue<Float4> val)
6675 return RValue<Float4>(Nucleus::createFNeg(val.value));
6678 RValue<Float4> Abs(RValue<Float4> x)
6680 Value *vector = Nucleus::createBitCast(x.value, Int4::getType());
6682 Constant *constantVector[4];
6683 constantVector[0] = Nucleus::createConstantInt(0x7FFFFFFF);
6684 constantVector[1] = Nucleus::createConstantInt(0x7FFFFFFF);
6685 constantVector[2] = Nucleus::createConstantInt(0x7FFFFFFF);
6686 constantVector[3] = Nucleus::createConstantInt(0x7FFFFFFF);
6688 Value *result = Nucleus::createAnd(vector, V(Nucleus::createConstantVector(constantVector, 4)));
6690 return RValue<Float4>(Nucleus::createBitCast(result, Float4::getType()));
6693 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
6695 return x86::maxps(x, y);
6698 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
6700 return x86::minps(x, y);
6703 RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
6707 // rcpps uses a piecewise-linear approximation which minimizes the relative error
6708 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
6709 return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
6713 return x86::rcpps(x);
6717 RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
6719 return x86::rsqrtps(x);
6722 RValue<Float4> Sqrt(RValue<Float4> x)
6724 return x86::sqrtps(x);
6727 RValue<Float4> Insert(const Float4 &val, RValue<Float> element, int i)
6729 Value *value = val.loadValue();
6730 Value *insert = Nucleus::createInsertElement(value, element.value, i);
6732 val = RValue<Float4>(insert);
6737 RValue<Float> Extract(RValue<Float4> x, int i)
6739 return RValue<Float>(Nucleus::createExtractElement(x.value, i));
6742 RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select)
6744 return RValue<Float4>(Nucleus::createSwizzle(x.value, select));
6747 RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
6749 Constant *shuffle[4];
6750 shuffle[0] = Nucleus::createConstantInt(((imm >> 0) & 0x03) + 0);
6751 shuffle[1] = Nucleus::createConstantInt(((imm >> 2) & 0x03) + 0);
6752 shuffle[2] = Nucleus::createConstantInt(((imm >> 4) & 0x03) + 4);
6753 shuffle[3] = Nucleus::createConstantInt(((imm >> 6) & 0x03) + 4);
6755 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, V(Nucleus::createConstantVector(shuffle, 4))));
6758 RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y)
6760 Constant *shuffle[4];
6761 shuffle[0] = Nucleus::createConstantInt(0);
6762 shuffle[1] = Nucleus::createConstantInt(4);
6763 shuffle[2] = Nucleus::createConstantInt(1);
6764 shuffle[3] = Nucleus::createConstantInt(5);
6766 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, V(Nucleus::createConstantVector(shuffle, 4))));
6769 RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y)
6771 Constant *shuffle[4];
6772 shuffle[0] = Nucleus::createConstantInt(2);
6773 shuffle[1] = Nucleus::createConstantInt(6);
6774 shuffle[2] = Nucleus::createConstantInt(3);
6775 shuffle[3] = Nucleus::createConstantInt(7);
6777 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, V(Nucleus::createConstantVector(shuffle, 4))));
6780 RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select)
6782 Value *vector = lhs.loadValue();
6783 Value *shuffle = Nucleus::createMask(vector, rhs.value, select);
6784 lhs.storeValue(shuffle);
6786 return RValue<Float4>(shuffle);
6789 RValue<Int> SignMask(RValue<Float4> x)
6791 return x86::movmskps(x);
6794 RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
6796 // return As<Int4>(x86::cmpeqps(x, y));
6797 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
6800 RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
6802 // return As<Int4>(x86::cmpltps(x, y));
6803 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
6806 RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
6808 // return As<Int4>(x86::cmpleps(x, y));
6809 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
6812 RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
6814 // return As<Int4>(x86::cmpneqps(x, y));
6815 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
6818 RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
6820 // return As<Int4>(x86::cmpnltps(x, y));
6821 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
6824 RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
6826 // return As<Int4>(x86::cmpnleps(x, y));
6827 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
6830 RValue<Float4> Round(RValue<Float4> x)
6832 if(CPUID::supportsSSE4_1())
6834 return x86::roundps(x, 0);
6838 return Float4(RoundInt(x));
6842 RValue<Float4> Trunc(RValue<Float4> x)
6844 if(CPUID::supportsSSE4_1())
6846 return x86::roundps(x, 3);
6850 return Float4(Int4(x)); // Rounded toward zero
6854 RValue<Float4> Frac(RValue<Float4> x)
6856 if(CPUID::supportsSSE4_1())
6858 return x - x86::floorps(x);
6862 Float4 frc = x - Float4(Int4(x)); // Signed fractional part
6864 return frc + As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1)));
6868 RValue<Float4> Floor(RValue<Float4> x)
6870 if(CPUID::supportsSSE4_1())
6872 return x86::floorps(x);
6880 RValue<Float4> Ceil(RValue<Float4> x)
6882 if(CPUID::supportsSSE4_1())
6884 return x86::ceilps(x);
6892 Type *Float4::getType()
6894 return T(VectorType::get(Float::getType(), 4));
6897 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
6899 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), V(Nucleus::createConstantInt(offset))));
6902 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6904 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value));
6907 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6909 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value));
6912 RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, int offset)
6914 return lhs = lhs + offset;
6917 RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, RValue<Int> offset)
6919 return lhs = lhs + offset;
6922 RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, RValue<UInt> offset)
6924 return lhs = lhs + offset;
6927 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset)
6929 return lhs + -offset;
6932 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6934 return lhs + -offset;
6937 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6939 return lhs + -offset;
6942 RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, int offset)
6944 return lhs = lhs - offset;
6947 RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, RValue<Int> offset)
6949 return lhs = lhs - offset;
6952 RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, RValue<UInt> offset)
6954 return lhs = lhs - offset;
6959 Nucleus::createRetVoid();
6960 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6961 Nucleus::createUnreachable();
6964 void Return(bool ret)
6966 Nucleus::createRet(V(Nucleus::createConstantBool(ret)));
6967 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6968 Nucleus::createUnreachable();
6971 void Return(const Int &ret)
6973 Nucleus::createRet(ret.loadValue());
6974 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6975 Nucleus::createUnreachable();
6978 BasicBlock *beginLoop()
6980 BasicBlock *loopBB = Nucleus::createBasicBlock();
6982 Nucleus::createBr(loopBB);
6983 Nucleus::setInsertBlock(loopBB);
6988 bool branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
6990 Nucleus::createCondBr(cmp.value, bodyBB, endBB);
6991 Nucleus::setInsertBlock(bodyBB);
6996 bool elseBlock(BasicBlock *falseBB)
6998 falseBB->back().eraseFromParent();
6999 Nucleus::setInsertBlock(falseBB);
7004 RValue<Long> Ticks()
7006 llvm::Function *rdtsc = Intrinsic::getDeclaration(::module, Intrinsic::readcyclecounter);
7008 return RValue<Long>(V(::builder->CreateCall(rdtsc)));
7016 RValue<Int> cvtss2si(RValue<Float> val)
7018 llvm::Function *cvtss2si = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cvtss2si);
7023 return RValue<Int>(V(::builder->CreateCall(cvtss2si, RValue<Float4>(vector).value)));
7026 RValue<Int2> cvtps2pi(RValue<Float4> val)
7028 llvm::Function *cvtps2pi = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cvtps2pi);
7030 return RValue<Int2>(V(::builder->CreateCall(cvtps2pi, val.value)));
7033 RValue<Int2> cvttps2pi(RValue<Float4> val)
7035 llvm::Function *cvttps2pi = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cvttps2pi);
7037 return RValue<Int2>(V(::builder->CreateCall(cvttps2pi, val.value)));
7040 RValue<Int4> cvtps2dq(RValue<Float4> val)
7042 if(CPUID::supportsSSE2())
7044 llvm::Function *cvtps2dq = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_cvtps2dq);
7046 return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, val.value)));
7050 Int2 lo = x86::cvtps2pi(val);
7051 Int2 hi = x86::cvtps2pi(Swizzle(val, 0xEE));
7053 return Int4(lo, hi);
7057 RValue<Float> rcpss(RValue<Float> val)
7059 llvm::Function *rcpss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rcp_ss);
7061 Value *vector = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), val.value, 0);
7063 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rcpss, vector)), 0));
7066 RValue<Float> sqrtss(RValue<Float> val)
7068 llvm::Function *sqrtss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_sqrt_ss);
7070 Value *vector = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), val.value, 0);
7072 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(sqrtss, vector)), 0));
7075 RValue<Float> rsqrtss(RValue<Float> val)
7077 llvm::Function *rsqrtss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rsqrt_ss);
7079 Value *vector = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), val.value, 0);
7081 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rsqrtss, vector)), 0));
7084 RValue<Float4> rcpps(RValue<Float4> val)
7086 llvm::Function *rcpps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rcp_ps);
7088 return RValue<Float4>(V(::builder->CreateCall(rcpps, val.value)));
7091 RValue<Float4> sqrtps(RValue<Float4> val)
7093 llvm::Function *sqrtps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_sqrt_ps);
7095 return RValue<Float4>(V(::builder->CreateCall(sqrtps, val.value)));
7098 RValue<Float4> rsqrtps(RValue<Float4> val)
7100 llvm::Function *rsqrtps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rsqrt_ps);
7102 return RValue<Float4>(V(::builder->CreateCall(rsqrtps, val.value)));
7105 RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
7107 llvm::Function *maxps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_max_ps);
7109 return RValue<Float4>(V(::builder->CreateCall2(maxps, x.value, y.value)));
7112 RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
7114 llvm::Function *minps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_min_ps);
7116 return RValue<Float4>(V(::builder->CreateCall2(minps, x.value, y.value)));
7119 RValue<Float> roundss(RValue<Float> val, unsigned char imm)
7121 llvm::Function *roundss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_round_ss);
7123 Value *undef = V(UndefValue::get(Float4::getType()));
7124 Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
7126 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(roundss, undef, vector, V(Nucleus::createConstantInt(imm)))), 0));
7129 RValue<Float> floorss(RValue<Float> val)
7131 return roundss(val, 1);
7134 RValue<Float> ceilss(RValue<Float> val)
7136 return roundss(val, 2);
7139 RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
7141 llvm::Function *roundps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_round_ps);
7143 return RValue<Float4>(V(::builder->CreateCall2(roundps, val.value, V(Nucleus::createConstantInt(imm)))));
7146 RValue<Float4> floorps(RValue<Float4> val)
7148 return roundps(val, 1);
7151 RValue<Float4> ceilps(RValue<Float4> val)
7153 return roundps(val, 2);
7156 RValue<Float4> cmpps(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
7158 llvm::Function *cmpps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cmp_ps);
7160 return RValue<Float4>(V(::builder->CreateCall3(cmpps, x.value, y.value, V(Nucleus::createConstantByte(imm)))));
7163 RValue<Float4> cmpeqps(RValue<Float4> x, RValue<Float4> y)
7165 return cmpps(x, y, 0);
7168 RValue<Float4> cmpltps(RValue<Float4> x, RValue<Float4> y)
7170 return cmpps(x, y, 1);
7173 RValue<Float4> cmpleps(RValue<Float4> x, RValue<Float4> y)
7175 return cmpps(x, y, 2);
7178 RValue<Float4> cmpunordps(RValue<Float4> x, RValue<Float4> y)
7180 return cmpps(x, y, 3);
7183 RValue<Float4> cmpneqps(RValue<Float4> x, RValue<Float4> y)
7185 return cmpps(x, y, 4);
7188 RValue<Float4> cmpnltps(RValue<Float4> x, RValue<Float4> y)
7190 return cmpps(x, y, 5);
7193 RValue<Float4> cmpnleps(RValue<Float4> x, RValue<Float4> y)
7195 return cmpps(x, y, 6);
7198 RValue<Float4> cmpordps(RValue<Float4> x, RValue<Float4> y)
7200 return cmpps(x, y, 7);
7203 RValue<Float> cmpss(RValue<Float> x, RValue<Float> y, unsigned char imm)
7205 llvm::Function *cmpss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cmp_ss);
7207 Value *vector1 = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), x.value, 0);
7208 Value *vector2 = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), y.value, 0);
7210 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(cmpss, vector1, vector2, V(Nucleus::createConstantByte(imm)))), 0));
7213 RValue<Float> cmpeqss(RValue<Float> x, RValue<Float> y)
7215 return cmpss(x, y, 0);
7218 RValue<Float> cmpltss(RValue<Float> x, RValue<Float> y)
7220 return cmpss(x, y, 1);
7223 RValue<Float> cmpless(RValue<Float> x, RValue<Float> y)
7225 return cmpss(x, y, 2);
7228 RValue<Float> cmpunordss(RValue<Float> x, RValue<Float> y)
7230 return cmpss(x, y, 3);
7233 RValue<Float> cmpneqss(RValue<Float> x, RValue<Float> y)
7235 return cmpss(x, y, 4);
7238 RValue<Float> cmpnltss(RValue<Float> x, RValue<Float> y)
7240 return cmpss(x, y, 5);
7243 RValue<Float> cmpnless(RValue<Float> x, RValue<Float> y)
7245 return cmpss(x, y, 6);
7248 RValue<Float> cmpordss(RValue<Float> x, RValue<Float> y)
7250 return cmpss(x, y, 7);
7253 RValue<Int4> pabsd(RValue<Int4> x)
7255 llvm::Function *pabsd = Intrinsic::getDeclaration(::module, Intrinsic::x86_ssse3_pabs_d_128);
7257 return RValue<Int4>(V(::builder->CreateCall(pabsd, x.value)));
7260 RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
7262 llvm::Function *paddsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padds_w);
7264 return As<Short4>(RValue<MMX>(V(::builder->CreateCall2(paddsw, As<MMX>(x).value, As<MMX>(y).value))));
7267 RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
7269 llvm::Function *psubsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubs_w);
7271 return As<Short4>(RValue<MMX>(V(::builder->CreateCall2(psubsw, As<MMX>(x).value, As<MMX>(y).value))));
7274 RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
7276 llvm::Function *paddusw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_paddus_w);
7278 return As<UShort4>(RValue<MMX>(V(::builder->CreateCall2(paddusw, As<MMX>(x).value, As<MMX>(y).value))));
7281 RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
7283 llvm::Function *psubusw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubus_w);
7285 return As<UShort4>(RValue<MMX>(V(::builder->CreateCall2(psubusw, As<MMX>(x).value, As<MMX>(y).value))));
7288 RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
7290 llvm::Function *paddsb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padds_b);
7292 return As<SByte8>(RValue<MMX>(V(::builder->CreateCall2(paddsb, As<MMX>(x).value, As<MMX>(y).value))));
7295 RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
7297 llvm::Function *psubsb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubs_b);
7299 return As<SByte8>(RValue<MMX>(V(::builder->CreateCall2(psubsb, As<MMX>(x).value, As<MMX>(y).value))));
7302 RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
7304 llvm::Function *paddusb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_paddus_b);
7306 return As<Byte8>(RValue<MMX>(V(::builder->CreateCall2(paddusb, As<MMX>(x).value, As<MMX>(y).value))));
7309 RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
7311 llvm::Function *psubusb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubus_b);
7313 return As<Byte8>(RValue<MMX>(V(::builder->CreateCall2(psubusb, As<MMX>(x).value, As<MMX>(y).value))));
7316 RValue<Short4> paddw(RValue<Short4> x, RValue<Short4> y)
7318 llvm::Function *paddw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padd_w);
7320 return As<Short4>(RValue<MMX>(V(::builder->CreateCall2(paddw, As<MMX>(x).value, As<MMX>(y).value))));
7323 RValue<Short4> psubw(RValue<Short4> x, RValue<Short4> y)
7325 llvm::Function *psubw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psub_w);
7327 return As<Short4>(RValue<MMX>(V(::builder->CreateCall2(psubw, As<MMX>(x).value, As<MMX>(y).value))));
7330 RValue<Short4> pmullw(RValue<Short4> x, RValue<Short4> y)
7332 llvm::Function *pmullw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmull_w);
7334 return As<Short4>(RValue<MMX>(V(::builder->CreateCall2(pmullw, As<MMX>(x).value, As<MMX>(y).value))));
7337 RValue<Short4> pand(RValue<Short4> x, RValue<Short4> y)
7339 llvm::Function *pand = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pand);
7341 return As<Short4>(RValue<MMX>(V(::builder->CreateCall2(pand, As<MMX>(x).value, As<MMX>(y).value))));
7344 RValue<Short4> por(RValue<Short4> x, RValue<Short4> y)
7346 llvm::Function *por = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_por);
7348 return As<Short4>(RValue<MMX>(V(::builder->CreateCall2(por, As<MMX>(x).value, As<MMX>(y).value))));
7351 RValue<Short4> pxor(RValue<Short4> x, RValue<Short4> y)
7353 llvm::Function *pxor = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pxor);
7355 return As<Short4>(RValue<MMX>(V(::builder->CreateCall2(pxor, As<MMX>(x).value, As<MMX>(y).value))));
7358 RValue<Short4> pshufw(RValue<Short4> x, unsigned char y)
7360 llvm::Function *pshufw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_pshuf_w);
7362 return As<Short4>(RValue<MMX>(V(::builder->CreateCall2(pshufw, As<MMX>(x).value, V(Nucleus::createConstantByte(y))))));
7365 RValue<Int2> punpcklwd(RValue<Short4> x, RValue<Short4> y)
7367 llvm::Function *punpcklwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpcklwd);
7369 return As<Int2>(RValue<MMX>(V(::builder->CreateCall2(punpcklwd, As<MMX>(x).value, As<MMX>(y).value))));
7372 RValue<Int2> punpckhwd(RValue<Short4> x, RValue<Short4> y)
7374 llvm::Function *punpckhwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckhwd);
7376 return As<Int2>(RValue<MMX>(V(::builder->CreateCall2(punpckhwd, As<MMX>(x).value, As<MMX>(y).value))));
7379 RValue<Short4> pinsrw(RValue<Short4> x, RValue<Int> y, unsigned int i)
7381 llvm::Function *pinsrw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pinsr_w);
7383 return As<Short4>(RValue<MMX>(V(::builder->CreateCall3(pinsrw, As<MMX>(x).value, y.value, V(Nucleus::createConstantInt(i))))));
7386 RValue<Int> pextrw(RValue<Short4> x, unsigned int i)
7388 llvm::Function *pextrw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pextr_w);
7390 return RValue<Int>(V(::builder->CreateCall2(pextrw, As<MMX>(x).value, V(Nucleus::createConstantInt(i)))));
7393 RValue<Long1> punpckldq(RValue<Int2> x, RValue<Int2> y)
7395 llvm::Function *punpckldq = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckldq);
7397 return As<Long1>(RValue<MMX>(V(::builder->CreateCall2(punpckldq, As<MMX>(x).value, As<MMX>(y).value))));
7400 RValue<Long1> punpckhdq(RValue<Int2> x, RValue<Int2> y)
7402 llvm::Function *punpckhdq = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckhdq);
7404 return As<Long1>(RValue<MMX>(V(::builder->CreateCall2(punpckhdq, As<MMX>(x).value, As<MMX>(y).value))));
7407 RValue<Short4> punpcklbw(RValue<Byte8> x, RValue<Byte8> y)
7409 llvm::Function *punpcklbw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpcklbw);
7411 return As<Short4>(RValue<MMX>(V(::builder->CreateCall2(punpcklbw, As<MMX>(x).value, As<MMX>(y).value))));
7414 RValue<Short4> punpckhbw(RValue<Byte8> x, RValue<Byte8> y)
7416 llvm::Function *punpckhbw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckhbw);
7418 return As<Short4>(RValue<MMX>(V(::builder->CreateCall2(punpckhbw, As<MMX>(x).value, As<MMX>(y).value))));
7421 RValue<Byte8> paddb(RValue<Byte8> x, RValue<Byte8> y)
7423 llvm::Function *paddb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padd_b);
7425 return As<Byte8>(RValue<MMX>(V(::builder->CreateCall2(paddb, As<MMX>(x).value, As<MMX>(y).value))));
7428 RValue<Byte8> psubb(RValue<Byte8> x, RValue<Byte8> y)
7430 llvm::Function *psubb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psub_b);
7432 return As<Byte8>(RValue<MMX>(V(::builder->CreateCall2(psubb, As<MMX>(x).value, As<MMX>(y).value))));
7435 RValue<Int2> paddd(RValue<Int2> x, RValue<Int2> y)
7437 llvm::Function *paddd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padd_d);
7439 return As<Int2>(RValue<MMX>(V(::builder->CreateCall2(paddd, As<MMX>(x).value, As<MMX>(y).value))));
7442 RValue<Int2> psubd(RValue<Int2> x, RValue<Int2> y)
7444 llvm::Function *psubd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psub_d);
7446 return As<Int2>(RValue<MMX>(V(::builder->CreateCall2(psubd, As<MMX>(x).value, As<MMX>(y).value))));
7449 RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
7451 llvm::Function *pavgw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pavg_w);
7453 return As<UShort4>(RValue<MMX>(V(::builder->CreateCall2(pavgw, As<MMX>(x).value, As<MMX>(y).value))));
7456 RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
7458 llvm::Function *pmaxsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmaxs_w);
7460 return As<Short4>(RValue<MMX>(V(::builder->CreateCall2(pmaxsw, As<MMX>(x).value, As<MMX>(y).value))));
7463 RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
7465 llvm::Function *pminsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmins_w);
7467 return As<Short4>(RValue<MMX>(V(::builder->CreateCall2(pminsw, As<MMX>(x).value, As<MMX>(y).value))));
7470 RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
7472 llvm::Function *pcmpgtw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpgt_w);
7474 return As<Short4>(RValue<MMX>(V(::builder->CreateCall2(pcmpgtw, As<MMX>(x).value, As<MMX>(y).value))));
7477 RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
7479 llvm::Function *pcmpeqw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpeq_w);
7481 return As<Short4>(RValue<MMX>(V(::builder->CreateCall2(pcmpeqw, As<MMX>(x).value, As<MMX>(y).value))));
7484 RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
7486 llvm::Function *pcmpgtb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpgt_b);
7488 return As<Byte8>(RValue<MMX>(V(::builder->CreateCall2(pcmpgtb, As<MMX>(x).value, As<MMX>(y).value))));
7491 RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
7493 llvm::Function *pcmpeqb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpeq_b);
7495 return As<Byte8>(RValue<MMX>(V(::builder->CreateCall2(pcmpeqb, As<MMX>(x).value, As<MMX>(y).value))));
7498 RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
7500 llvm::Function *packssdw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_packssdw);
7502 return As<Short4>(RValue<MMX>(V(::builder->CreateCall2(packssdw, As<MMX>(x).value, As<MMX>(y).value))));
7505 RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
7507 if(CPUID::supportsSSE2())
7509 llvm::Function *packssdw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_packssdw_128);
7511 return RValue<Short8>(V(::builder->CreateCall2(packssdw, x.value, y.value)));
7516 Int2 hiX = Int2(Swizzle(x, 0xEE));
7519 Int2 hiY = Int2(Swizzle(y, 0xEE));
7521 Short4 lo = x86::packssdw(loX, hiX);
7522 Short4 hi = x86::packssdw(loY, hiY);
7524 return Short8(lo, hi);
7528 RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
7530 llvm::Function *packsswb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_packsswb);
7532 return As<SByte8>(RValue<MMX>(V(::builder->CreateCall2(packsswb, As<MMX>(x).value, As<MMX>(y).value))));
7535 RValue<Byte8> packuswb(RValue<UShort4> x, RValue<UShort4> y)
7537 llvm::Function *packuswb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_packuswb);
7539 return As<Byte8>(RValue<MMX>(V(::builder->CreateCall2(packuswb, As<MMX>(x).value, As<MMX>(y).value))));
7542 RValue<UShort8> packusdw(RValue<UInt4> x, RValue<UInt4> y)
7544 if(CPUID::supportsSSE4_1())
7546 llvm::Function *packusdw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_packusdw);
7548 return RValue<UShort8>(V(::builder->CreateCall2(packusdw, x.value, y.value)));
7552 // FIXME: Not an exact replacement!
7553 return As<UShort8>(packssdw(As<Int4>(x - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000)), As<Int4>(y - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000))) + Short8(0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u));
7557 RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
7559 llvm::Function *psrlw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrli_w);
7561 return As<UShort4>(RValue<MMX>(V(::builder->CreateCall2(psrlw, As<MMX>(x).value, V(Nucleus::createConstantInt(y))))));
7564 RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
7566 llvm::Function *psrlw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrli_w);
7568 return RValue<UShort8>(V(::builder->CreateCall2(psrlw, x.value, V(Nucleus::createConstantInt(y)))));
7571 RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
7573 llvm::Function *psraw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrai_w);
7575 return As<Short4>(RValue<MMX>(V(::builder->CreateCall2(psraw, As<MMX>(x).value, V(Nucleus::createConstantInt(y))))));
7578 RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
7580 llvm::Function *psraw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrai_w);
7582 return RValue<Short8>(V(::builder->CreateCall2(psraw, x.value, V(Nucleus::createConstantInt(y)))));
7585 RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
7587 llvm::Function *psllw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pslli_w);
7589 return As<Short4>(RValue<MMX>(V(::builder->CreateCall2(psllw, As<MMX>(x).value, V(Nucleus::createConstantInt(y))))));
7592 RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
7594 llvm::Function *psllw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pslli_w);
7596 return RValue<Short8>(V(::builder->CreateCall2(psllw, x.value, V(Nucleus::createConstantInt(y)))));
7599 RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
7601 llvm::Function *pslld = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pslli_d);
7603 return As<Int2>(RValue<MMX>(V(::builder->CreateCall2(pslld, As<MMX>(x).value, V(Nucleus::createConstantInt(y))))));
7606 RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
7608 if(CPUID::supportsSSE2())
7610 llvm::Function *pslld = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pslli_d);
7612 return RValue<Int4>(V(::builder->CreateCall2(pslld, x.value, V(Nucleus::createConstantInt(y)))));
7617 Int2 hi = Int2(Swizzle(x, 0xEE));
7619 lo = x86::pslld(lo, y);
7620 hi = x86::pslld(hi, y);
7622 return Int4(lo, hi);
7626 RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
7628 llvm::Function *psrad = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrai_d);
7630 return As<Int2>(RValue<MMX>(V(::builder->CreateCall2(psrad, As<MMX>(x).value, V(Nucleus::createConstantInt(y))))));
7633 RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
7635 if(CPUID::supportsSSE2())
7637 llvm::Function *psrad = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrai_d);
7639 return RValue<Int4>(V(::builder->CreateCall2(psrad, x.value, V(Nucleus::createConstantInt(y)))));
7644 Int2 hi = Int2(Swizzle(x, 0xEE));
7646 lo = x86::psrad(lo, y);
7647 hi = x86::psrad(hi, y);
7649 return Int4(lo, hi);
7653 RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
7655 llvm::Function *psrld = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrli_d);
7657 return As<UInt2>(RValue<MMX>(V(::builder->CreateCall2(psrld, As<MMX>(x).value, V(Nucleus::createConstantInt(y))))));
7660 RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
7662 if(CPUID::supportsSSE2())
7664 llvm::Function *psrld = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrli_d);
7666 return RValue<UInt4>(V(::builder->CreateCall2(psrld, x.value, V(Nucleus::createConstantInt(y)))));
7670 UInt2 lo = As<UInt2>(Int2(As<Int4>(x)));
7671 UInt2 hi = As<UInt2>(Int2(Swizzle(As<Int4>(x), 0xEE)));
7673 lo = x86::psrld(lo, y);
7674 hi = x86::psrld(hi, y);
7676 return UInt4(lo, hi);
7680 RValue<UShort4> psrlw(RValue<UShort4> x, RValue<Long1> y)
7682 llvm::Function *psrlw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrl_w);
7684 return As<UShort4>(RValue<MMX>(V(::builder->CreateCall2(psrlw, As<MMX>(x).value, As<MMX>(y).value))));
7687 RValue<Short4> psraw(RValue<Short4> x, RValue<Long1> y)
7689 llvm::Function *psraw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psra_w);
7691 return As<Short4>(RValue<MMX>(V(::builder->CreateCall2(psraw, As<MMX>(x).value, As<MMX>(y).value))));
7694 RValue<Short4> psllw(RValue<Short4> x, RValue<Long1> y)
7696 llvm::Function *psllw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psll_w);
7698 return As<Short4>(RValue<MMX>(V(::builder->CreateCall2(psllw, As<MMX>(x).value, As<MMX>(y).value))));
7701 RValue<Int2> pslld(RValue<Int2> x, RValue<Long1> y)
7703 llvm::Function *pslld = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psll_d);
7705 return As<Int2>(RValue<MMX>(V(::builder->CreateCall2(pslld, As<MMX>(x).value, As<MMX>(y).value))));
7708 RValue<UInt2> psrld(RValue<UInt2> x, RValue<Long1> y)
7710 llvm::Function *psrld = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrl_d);
7712 return As<UInt2>(RValue<MMX>(V(::builder->CreateCall2(psrld, As<MMX>(x).value, As<MMX>(y).value))));
7715 RValue<Int2> psrad(RValue<Int2> x, RValue<Long1> y)
7717 llvm::Function *psrld = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psra_d);
7719 return As<Int2>(RValue<MMX>(V(::builder->CreateCall2(psrld, As<MMX>(x).value, As<MMX>(y).value))));
7722 RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
7724 llvm::Function *pmaxsd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmaxsd);
7726 return RValue<Int4>(V(::builder->CreateCall2(pmaxsd, x.value, y.value)));
7729 RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
7731 llvm::Function *pminsd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pminsd);
7733 return RValue<Int4>(V(::builder->CreateCall2(pminsd, x.value, y.value)));
7736 RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
7738 llvm::Function *pmaxud = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmaxud);
7740 return RValue<UInt4>(V(::builder->CreateCall2(pmaxud, x.value, y.value)));
7743 RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
7745 llvm::Function *pminud = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pminud);
7747 return RValue<UInt4>(V(::builder->CreateCall2(pminud, x.value, y.value)));
7750 RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
7752 llvm::Function *pmulhw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmulh_w);
7754 return As<Short4>(RValue<MMX>(V(::builder->CreateCall2(pmulhw, As<MMX>(x).value, As<MMX>(y).value))));
7757 RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
7759 llvm::Function *pmulhuw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmulhu_w);
7761 return As<UShort4>(RValue<MMX>(V(::builder->CreateCall2(pmulhuw, As<MMX>(x).value, As<MMX>(y).value))));
7764 RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
7766 llvm::Function *pmaddwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmadd_wd);
7768 return As<Int2>(RValue<MMX>(V(::builder->CreateCall2(pmaddwd, As<MMX>(x).value, As<MMX>(y).value))));
7771 RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
7773 llvm::Function *pmulhw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pmulh_w);
7775 return RValue<Short8>(V(::builder->CreateCall2(pmulhw, x.value, y.value)));
7778 RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
7780 llvm::Function *pmulhuw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pmulhu_w);
7782 return RValue<UShort8>(V(::builder->CreateCall2(pmulhuw, x.value, y.value)));
7785 RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
7787 llvm::Function *pmaddwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pmadd_wd);
7789 return RValue<Int4>(V(::builder->CreateCall2(pmaddwd, x.value, y.value)));
7792 RValue<Int> movmskps(RValue<Float4> x)
7794 llvm::Function *movmskps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_movmsk_ps);
7796 return RValue<Int>(V(::builder->CreateCall(movmskps, x.value)));
7799 RValue<Int> pmovmskb(RValue<Byte8> x)
7801 llvm::Function *pmovmskb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmovmskb);
7803 return RValue<Int>(V(::builder->CreateCall(pmovmskb, As<MMX>(x).value)));
7806 //RValue<Int2> movd(RValue<Pointer<Int>> x)
7808 // Value *element = Nucleus::createLoad(x.value);
7810 //// Value *int2 = UndefValue::get(Int2::getType());
7811 //// int2 = Nucleus::createInsertElement(int2, element, ConstantInt::get(Int::getType(), 0));
7813 // Value *int2 = Nucleus::createBitCast(Nucleus::createZExt(element, Long::getType()), Int2::getType());
7815 // return RValue<Int2>(int2);
7818 //RValue<Int2> movdq2q(RValue<Int4> x)
7820 // Value *long2 = Nucleus::createBitCast(x.value, Long2::getType());
7821 // Value *element = Nucleus::createExtractElement(long2, ConstantInt::get(Int::getType(), 0));
7823 // return RValue<Int2>(Nucleus::createBitCast(element, Int2::getType()));
7826 RValue<Int4> pmovzxbd(RValue<Int4> x)
7828 llvm::Function *pmovzxbd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovzxbd);
7830 return RValue<Int4>(V(::builder->CreateCall(pmovzxbd, Nucleus::createBitCast(x.value, Byte16::getType()))));
7833 RValue<Int4> pmovsxbd(RValue<Int4> x)
7835 llvm::Function *pmovsxbd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovsxbd);
7837 return RValue<Int4>(V(::builder->CreateCall(pmovsxbd, Nucleus::createBitCast(x.value, SByte16::getType()))));
7840 RValue<Int4> pmovzxwd(RValue<Int4> x)
7842 llvm::Function *pmovzxwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovzxwd);
7844 return RValue<Int4>(V(::builder->CreateCall(pmovzxwd, Nucleus::createBitCast(x.value, UShort8::getType()))));
7847 RValue<Int4> pmovsxwd(RValue<Int4> x)
7849 llvm::Function *pmovsxwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovsxwd);
7851 return RValue<Int4>(V(::builder->CreateCall(pmovsxwd, Nucleus::createBitCast(x.value, Short8::getType()))));
7856 llvm::Function *emms = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_emms);
7858 V(::builder->CreateCall(emms));