1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "Nucleus.hpp"
17 #include "llvm/Support/IRBuilder.h"
18 #include "llvm/Function.h"
19 #include "llvm/GlobalVariable.h"
20 #include "llvm/Module.h"
21 #include "llvm/LLVMContext.h"
22 #include "llvm/Constants.h"
23 #include "llvm/Intrinsics.h"
24 #include "llvm/PassManager.h"
25 #include "llvm/Analysis/LoopPass.h"
26 #include "llvm/Transforms/Scalar.h"
27 #include "llvm/Target/TargetData.h"
28 #include "llvm/Target/TargetOptions.h"
29 #include "llvm/Support/TargetSelect.h"
30 #include "../lib/ExecutionEngine/JIT/JIT.h"
32 #include "LLVMRoutine.hpp"
33 #include "LLVMRoutineManager.hpp"
38 #include "MutexLock.hpp"
40 #include <xmmintrin.h>
43 #if defined(__x86_64__) && defined(_WIN32)
44 extern "C" void X86CompilationCallback()
46 assert(false); // UNIMPLEMENTED
52 bool (*CodeAnalystInitialize)() = 0;
53 void (*CodeAnalystCompleteJITLog)() = 0;
54 bool (*CodeAnalystLogJITCode)(const void *jitCodeStartAddr, unsigned int jitCodeSize, const wchar_t *functionName) = 0;
59 extern bool JITEmitDebugInfo;
64 sw::LLVMRoutineManager *routineManager = nullptr;
65 llvm::ExecutionEngine *executionEngine = nullptr;
66 llvm::IRBuilder<> *builder = nullptr;
67 llvm::LLVMContext *context = nullptr;
68 llvm::Module *module = nullptr;
69 llvm::Function *function = nullptr;
71 sw::BackoffLock codegenMutex;
73 sw::BasicBlock *falseBB = nullptr;
80 Optimization optimization[10] = {InstructionCombining, Disabled};
82 class Type : public llvm::Type {};
83 class Value : public llvm::Value {};
84 class BasicBlock : public llvm::BasicBlock {};
86 inline Type *T(llvm::Type *t)
88 return reinterpret_cast<Type*>(t);
91 inline Value *V(llvm::Value *t)
93 return reinterpret_cast<Value*>(t);
96 inline std::vector<llvm::Type*> &T(std::vector<Type*> &t)
98 return reinterpret_cast<std::vector<llvm::Type*>&>(t);
101 inline BasicBlock *B(llvm::BasicBlock *t)
103 return reinterpret_cast<BasicBlock*>(t);
108 ::codegenMutex.lock(); // Reactor and LLVM are currently not thread safe
110 InitializeNativeTarget();
111 JITEmitDebugInfo = false;
115 ::context = new LLVMContext();
118 ::module = new Module("", *::context);
119 ::routineManager = new LLVMRoutineManager();
121 #if defined(__x86_64__)
122 const char *architecture = "x86-64";
124 const char *architecture = "x86";
127 SmallVector<std::string, 1> MAttrs;
128 MAttrs.push_back(CPUID::supportsMMX() ? "+mmx" : "-mmx");
129 MAttrs.push_back(CPUID::supportsCMOV() ? "+cmov" : "-cmov");
130 MAttrs.push_back(CPUID::supportsSSE() ? "+sse" : "-sse");
131 MAttrs.push_back(CPUID::supportsSSE2() ? "+sse2" : "-sse2");
132 MAttrs.push_back(CPUID::supportsSSE3() ? "+sse3" : "-sse3");
133 MAttrs.push_back(CPUID::supportsSSSE3() ? "+ssse3" : "-ssse3");
134 MAttrs.push_back(CPUID::supportsSSE4_1() ? "+sse41" : "-sse41");
137 TargetMachine *targetMachine = EngineBuilder::selectTarget(::module, architecture, "", MAttrs, Reloc::Default, CodeModel::JITDefault, &error);
138 ::executionEngine = JIT::createJIT(::module, 0, ::routineManager, CodeGenOpt::Aggressive, true, targetMachine);
142 ::builder = new IRBuilder<>(*::context);
145 HMODULE CodeAnalyst = LoadLibrary("CAJitNtfyLib.dll");
148 CodeAnalystInitialize = (bool(*)())GetProcAddress(CodeAnalyst, "CAJIT_Initialize");
149 CodeAnalystCompleteJITLog = (void(*)())GetProcAddress(CodeAnalyst, "CAJIT_CompleteJITLog");
150 CodeAnalystLogJITCode = (bool(*)(const void*, unsigned int, const wchar_t*))GetProcAddress(CodeAnalyst, "CAJIT_LogJITCode");
152 CodeAnalystInitialize();
160 delete ::executionEngine;
161 ::executionEngine = nullptr;
163 ::routineManager = nullptr;
164 ::function = nullptr;
167 ::codegenMutex.unlock();
170 Routine *Nucleus::acquireRoutine(const wchar_t *name, bool runOptimizations)
172 if(::builder->GetInsertBlock()->empty() || !::builder->GetInsertBlock()->back().isTerminator())
174 llvm::Type *type = ::function->getReturnType();
182 createRet(V(UndefValue::get(type)));
189 raw_fd_ostream file("llvm-dump-unopt.txt", error);
190 ::module->print(file, 0);
201 raw_fd_ostream file("llvm-dump-opt.txt", error);
202 ::module->print(file, 0);
205 void *entry = ::executionEngine->getPointerToFunction(::function);
206 LLVMRoutine *routine = ::routineManager->acquireRoutine(entry);
208 if(CodeAnalystLogJITCode)
210 CodeAnalystLogJITCode(routine->getEntry(), routine->getCodeSize(), name);
216 void Nucleus::optimize()
218 static PassManager *passManager = nullptr;
222 passManager = new PassManager();
225 // NoInfsFPMath = true;
226 // NoNaNsFPMath = true;
228 passManager->add(new TargetData(*::executionEngine->getTargetData()));
229 passManager->add(createScalarReplAggregatesPass());
231 for(int pass = 0; pass < 10 && optimization[pass] != Disabled; pass++)
233 switch(optimization[pass])
235 case Disabled: break;
236 case CFGSimplification: passManager->add(createCFGSimplificationPass()); break;
237 case LICM: passManager->add(createLICMPass()); break;
238 case AggressiveDCE: passManager->add(createAggressiveDCEPass()); break;
239 case GVN: passManager->add(createGVNPass()); break;
240 case InstructionCombining: passManager->add(createInstructionCombiningPass()); break;
241 case Reassociate: passManager->add(createReassociatePass()); break;
242 case DeadStoreElimination: passManager->add(createDeadStoreEliminationPass()); break;
243 case SCCP: passManager->add(createSCCPPass()); break;
244 case ScalarReplAggregates: passManager->add(createScalarReplAggregatesPass()); break;
251 passManager->run(*::module);
254 Value *Nucleus::allocateStackVariable(Type *type, int arraySize)
256 // Need to allocate it in the entry block for mem2reg to work
257 llvm::BasicBlock &entryBlock = ::function->getEntryBlock();
259 Instruction *declaration;
263 declaration = new AllocaInst(type, Nucleus::createConstantInt(arraySize));
267 declaration = new AllocaInst(type, (Value*)0);
270 entryBlock.getInstList().push_front(declaration);
272 return V(declaration);
275 BasicBlock *Nucleus::createBasicBlock()
277 return B(BasicBlock::Create(*::context, "", ::function));
280 BasicBlock *Nucleus::getInsertBlock()
282 return B(::builder->GetInsertBlock());
285 void Nucleus::setInsertBlock(BasicBlock *basicBlock)
287 // assert(::builder->GetInsertBlock()->back().isTerminator());
288 return ::builder->SetInsertPoint(basicBlock);
291 void Nucleus::createFunction(Type *ReturnType, std::vector<Type*> &Params)
293 llvm::FunctionType *functionType = llvm::FunctionType::get(ReturnType, T(Params), false);
294 ::function = llvm::Function::Create(functionType, llvm::GlobalValue::InternalLinkage, "", ::module);
295 ::function->setCallingConv(llvm::CallingConv::C);
297 ::builder->SetInsertPoint(BasicBlock::Create(*::context, "", ::function));
300 Value *Nucleus::getArgument(unsigned int index)
302 llvm::Function::arg_iterator args = ::function->arg_begin();
313 void Nucleus::createRetVoid()
317 ::builder->CreateRetVoid();
320 void Nucleus::createRet(Value *v)
324 ::builder->CreateRet(v);
327 void Nucleus::createBr(BasicBlock *dest)
329 ::builder->CreateBr(dest);
332 void Nucleus::createCondBr(Value *cond, BasicBlock *ifTrue, BasicBlock *ifFalse)
334 ::builder->CreateCondBr(cond, ifTrue, ifFalse);
337 Value *Nucleus::createAdd(Value *lhs, Value *rhs)
339 return V(::builder->CreateAdd(lhs, rhs));
342 Value *Nucleus::createSub(Value *lhs, Value *rhs)
344 return V(::builder->CreateSub(lhs, rhs));
347 Value *Nucleus::createMul(Value *lhs, Value *rhs)
349 return V(::builder->CreateMul(lhs, rhs));
352 Value *Nucleus::createUDiv(Value *lhs, Value *rhs)
354 return V(::builder->CreateUDiv(lhs, rhs));
357 Value *Nucleus::createSDiv(Value *lhs, Value *rhs)
359 return V(::builder->CreateSDiv(lhs, rhs));
362 Value *Nucleus::createFAdd(Value *lhs, Value *rhs)
364 return V(::builder->CreateFAdd(lhs, rhs));
367 Value *Nucleus::createFSub(Value *lhs, Value *rhs)
369 return V(::builder->CreateFSub(lhs, rhs));
372 Value *Nucleus::createFMul(Value *lhs, Value *rhs)
374 return V(::builder->CreateFMul(lhs, rhs));
377 Value *Nucleus::createFDiv(Value *lhs, Value *rhs)
379 return V(::builder->CreateFDiv(lhs, rhs));
382 Value *Nucleus::createURem(Value *lhs, Value *rhs)
384 return V(::builder->CreateURem(lhs, rhs));
387 Value *Nucleus::createSRem(Value *lhs, Value *rhs)
389 return V(::builder->CreateSRem(lhs, rhs));
392 Value *Nucleus::createFRem(Value *lhs, Value *rhs)
394 return V(::builder->CreateFRem(lhs, rhs));
397 Value *Nucleus::createShl(Value *lhs, Value *rhs)
399 return V(::builder->CreateShl(lhs, rhs));
402 Value *Nucleus::createLShr(Value *lhs, Value *rhs)
404 return V(::builder->CreateLShr(lhs, rhs));
407 Value *Nucleus::createAShr(Value *lhs, Value *rhs)
409 return V(::builder->CreateAShr(lhs, rhs));
412 Value *Nucleus::createAnd(Value *lhs, Value *rhs)
414 return V(::builder->CreateAnd(lhs, rhs));
417 Value *Nucleus::createOr(Value *lhs, Value *rhs)
419 return V(::builder->CreateOr(lhs, rhs));
422 Value *Nucleus::createXor(Value *lhs, Value *rhs)
424 return V(::builder->CreateXor(lhs, rhs));
427 Value *Nucleus::createNeg(Value *v)
429 return V(::builder->CreateNeg(v));
432 Value *Nucleus::createFNeg(Value *v)
434 return V(::builder->CreateFNeg(v));
437 Value *Nucleus::createNot(Value *v)
439 return V(::builder->CreateNot(v));
442 Value *Nucleus::createLoad(Value *ptr, Type *type, bool isVolatile, unsigned int align)
444 assert(ptr->getType()->getContainedType(0) == type);
445 return V(::builder->Insert(new LoadInst(ptr, "", isVolatile, align)));
448 Value *Nucleus::createStore(Value *value, Value *ptr, Type *type, bool isVolatile, unsigned int align)
450 assert(ptr->getType()->getContainedType(0) == type);
451 ::builder->Insert(new StoreInst(value, ptr, isVolatile, align));
455 Value *Nucleus::createGEP(Value *ptr, Type *type, Value *index)
457 assert(ptr->getType()->getContainedType(0) == type);
458 return V(::builder->CreateGEP(ptr, index));
461 Value *Nucleus::createAtomicAdd(Value *ptr, Value *value)
463 return V(::builder->CreateAtomicRMW(AtomicRMWInst::Add, ptr, value, SequentiallyConsistent));
466 Value *Nucleus::createTrunc(Value *v, Type *destType)
468 return V(::builder->CreateTrunc(v, destType));
471 Value *Nucleus::createZExt(Value *v, Type *destType)
473 return V(::builder->CreateZExt(v, destType));
476 Value *Nucleus::createSExt(Value *v, Type *destType)
478 return V(::builder->CreateSExt(v, destType));
481 Value *Nucleus::createFPToSI(Value *v, Type *destType)
483 return V(::builder->CreateFPToSI(v, destType));
486 Value *Nucleus::createUIToFP(Value *v, Type *destType)
488 return V(::builder->CreateUIToFP(v, destType));
491 Value *Nucleus::createSIToFP(Value *v, Type *destType)
493 return V(::builder->CreateSIToFP(v, destType));
496 Value *Nucleus::createFPTrunc(Value *v, Type *destType)
498 return V(::builder->CreateFPTrunc(v, destType));
501 Value *Nucleus::createFPExt(Value *v, Type *destType)
503 return V(::builder->CreateFPExt(v, destType));
506 Value *Nucleus::createBitCast(Value *v, Type *destType)
508 return V(::builder->CreateBitCast(v, destType));
511 Value *Nucleus::createICmpEQ(Value *lhs, Value *rhs)
513 return V(::builder->CreateICmpEQ(lhs, rhs));
516 Value *Nucleus::createICmpNE(Value *lhs, Value *rhs)
518 return V(::builder->CreateICmpNE(lhs, rhs));
521 Value *Nucleus::createICmpUGT(Value *lhs, Value *rhs)
523 return V(::builder->CreateICmpUGT(lhs, rhs));
526 Value *Nucleus::createICmpUGE(Value *lhs, Value *rhs)
528 return V(::builder->CreateICmpUGE(lhs, rhs));
531 Value *Nucleus::createICmpULT(Value *lhs, Value *rhs)
533 return V(::builder->CreateICmpULT(lhs, rhs));
536 Value *Nucleus::createICmpULE(Value *lhs, Value *rhs)
538 return V(::builder->CreateICmpULE(lhs, rhs));
541 Value *Nucleus::createICmpSGT(Value *lhs, Value *rhs)
543 return V(::builder->CreateICmpSGT(lhs, rhs));
546 Value *Nucleus::createICmpSGE(Value *lhs, Value *rhs)
548 return V(::builder->CreateICmpSGE(lhs, rhs));
551 Value *Nucleus::createICmpSLT(Value *lhs, Value *rhs)
553 return V(::builder->CreateICmpSLT(lhs, rhs));
556 Value *Nucleus::createICmpSLE(Value *lhs, Value *rhs)
558 return V(::builder->CreateICmpSLE(lhs, rhs));
561 Value *Nucleus::createFCmpOEQ(Value *lhs, Value *rhs)
563 return V(::builder->CreateFCmpOEQ(lhs, rhs));
566 Value *Nucleus::createFCmpOGT(Value *lhs, Value *rhs)
568 return V(::builder->CreateFCmpOGT(lhs, rhs));
571 Value *Nucleus::createFCmpOGE(Value *lhs, Value *rhs)
573 return V(::builder->CreateFCmpOGE(lhs, rhs));
576 Value *Nucleus::createFCmpOLT(Value *lhs, Value *rhs)
578 return V(::builder->CreateFCmpOLT(lhs, rhs));
581 Value *Nucleus::createFCmpOLE(Value *lhs, Value *rhs)
583 return V(::builder->CreateFCmpOLE(lhs, rhs));
586 Value *Nucleus::createFCmpONE(Value *lhs, Value *rhs)
588 return V(::builder->CreateFCmpONE(lhs, rhs));
591 Value *Nucleus::createFCmpORD(Value *lhs, Value *rhs)
593 return V(::builder->CreateFCmpORD(lhs, rhs));
596 Value *Nucleus::createFCmpUNO(Value *lhs, Value *rhs)
598 return V(::builder->CreateFCmpUNO(lhs, rhs));
601 Value *Nucleus::createFCmpUEQ(Value *lhs, Value *rhs)
603 return V(::builder->CreateFCmpUEQ(lhs, rhs));
606 Value *Nucleus::createFCmpUGT(Value *lhs, Value *rhs)
608 return V(::builder->CreateFCmpUGT(lhs, rhs));
611 Value *Nucleus::createFCmpUGE(Value *lhs, Value *rhs)
613 return V(::builder->CreateFCmpUGE(lhs, rhs));
616 Value *Nucleus::createFCmpULT(Value *lhs, Value *rhs)
618 return V(::builder->CreateFCmpULT(lhs, rhs));
621 Value *Nucleus::createFCmpULE(Value *lhs, Value *rhs)
623 return V(::builder->CreateFCmpULE(lhs, rhs));
626 Value *Nucleus::createFCmpUNE(Value *lhs, Value *rhs)
628 return V(::builder->CreateFCmpULE(lhs, rhs));
631 Value *Nucleus::createExtractElement(Value *vector, Type *type, int index)
633 assert(vector->getType()->getContainedType(0) == type);
634 return V(::builder->CreateExtractElement(vector, createConstantInt(index)));
637 Value *Nucleus::createInsertElement(Value *vector, Value *element, int index)
639 return V(::builder->CreateInsertElement(vector, element, createConstantInt(index)));
642 Value *Nucleus::createShuffleVector(Value *V1, Value *V2, const int *select)
644 int size = llvm::cast<llvm::VectorType>(V1->getType())->getNumElements();
645 const int maxSize = 16;
646 llvm::Constant *swizzle[maxSize];
647 assert(size <= maxSize);
649 for(int i = 0; i < size; i++)
651 swizzle[i] = llvm::ConstantInt::get(Type::getInt32Ty(*::context), select[i]);
654 llvm::Value *shuffle = llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(swizzle, size));
656 return V(::builder->CreateShuffleVector(V1, V2, shuffle));
659 Value *Nucleus::createSelect(Value *C, Value *ifTrue, Value *ifFalse)
661 return V(::builder->CreateSelect(C, ifTrue, ifFalse));
664 Value *Nucleus::createSwitch(Value *v, BasicBlock *Dest, unsigned NumCases)
666 return V(::builder->CreateSwitch(v, Dest, NumCases));
669 void Nucleus::addSwitchCase(Value *Switch, int Case, BasicBlock *Branch)
671 reinterpret_cast<SwitchInst*>(Switch)->addCase(llvm::ConstantInt::get(Type::getInt32Ty(*::context), Case, true), Branch);
674 void Nucleus::createUnreachable()
676 ::builder->CreateUnreachable();
679 static Value *createSwizzle4(Value *val, unsigned char select)
683 (select >> 0) & 0x03,
684 (select >> 2) & 0x03,
685 (select >> 4) & 0x03,
686 (select >> 6) & 0x03,
689 return Nucleus::createShuffleVector(val, val, swizzle);
692 static Value *createMask4(Value *lhs, Value *rhs, unsigned char select)
694 bool mask[4] = {false, false, false, false};
696 mask[(select >> 0) & 0x03] = true;
697 mask[(select >> 2) & 0x03] = true;
698 mask[(select >> 4) & 0x03] = true;
699 mask[(select >> 6) & 0x03] = true;
709 Value *shuffle = Nucleus::createShuffleVector(lhs, rhs, swizzle);
714 Value *Nucleus::createConstantPointer(const void *address, Type *Ty, unsigned int align)
716 const GlobalValue *existingGlobal = ::executionEngine->getGlobalValueAtAddress(const_cast<void*>(address)); // FIXME: Const
720 return (Value*)existingGlobal;
723 llvm::GlobalValue *global = new llvm::GlobalVariable(*::module, Ty, true, llvm::GlobalValue::ExternalLinkage, 0, "");
724 global->setAlignment(align);
726 ::executionEngine->addGlobalMapping(global, const_cast<void*>(address));
731 Type *Nucleus::getPointerType(Type *ElementType)
733 return T(llvm::PointerType::get(ElementType, 0));
736 Value *Nucleus::createNullValue(Type *Ty)
738 return V(llvm::Constant::getNullValue(Ty));
741 Value *Nucleus::createConstantLong(int64_t i)
743 return V(llvm::ConstantInt::get(Type::getInt64Ty(*::context), i, true));
746 Value *Nucleus::createConstantInt(int i)
748 return V(llvm::ConstantInt::get(Type::getInt32Ty(*::context), i, true));
751 Value *Nucleus::createConstantInt(unsigned int i)
753 return V(llvm::ConstantInt::get(Type::getInt32Ty(*::context), i, false));
756 Value *Nucleus::createConstantBool(bool b)
758 return V(llvm::ConstantInt::get(Type::getInt1Ty(*::context), b));
761 Value *Nucleus::createConstantByte(signed char i)
763 return V(llvm::ConstantInt::get(Type::getInt8Ty(*::context), i, true));
766 Value *Nucleus::createConstantByte(unsigned char i)
768 return V(llvm::ConstantInt::get(Type::getInt8Ty(*::context), i, false));
771 Value *Nucleus::createConstantShort(short i)
773 return V(llvm::ConstantInt::get(Type::getInt16Ty(*::context), i, true));
776 Value *Nucleus::createConstantShort(unsigned short i)
778 return V(llvm::ConstantInt::get(Type::getInt16Ty(*::context), i, false));
781 Value *Nucleus::createConstantFloat(float x)
783 return V(llvm::ConstantFP::get(Float::getType(), x));
786 Value *Nucleus::createNullPointer(Type *Ty)
788 return V(llvm::ConstantPointerNull::get(llvm::PointerType::get(Ty, 0)));
791 Value *Nucleus::createConstantVector(const int64_t *constants, Type *type)
793 assert(llvm::isa<VectorType>(type));
794 const int numConstants = llvm::cast<VectorType>(type)->getNumElements();
795 assert(numConstants <= 16);
796 llvm::Constant *constantVector[16];
798 for(int i = 0; i < numConstants; i++)
800 constantVector[i] = llvm::ConstantInt::get(type->getContainedType(0), constants[i]);
803 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numConstants)));
806 Value *Nucleus::createConstantVector(const double *constants, Type *type)
808 assert(llvm::isa<VectorType>(type));
809 const int numConstants = llvm::cast<VectorType>(type)->getNumElements();
810 assert(numConstants <= 8);
811 llvm::Constant *constantVector[8];
813 for(int i = 0; i < numConstants; i++)
815 constantVector[i] = llvm::ConstantFP::get(type->getContainedType(0), constants[i]);
818 return V(llvm::ConstantVector::get(llvm::ArrayRef<llvm::Constant*>(constantVector, numConstants)));
821 Type *Void::getType()
823 return T(llvm::Type::getVoidTy(*::context));
826 class MMX : public Variable<MMX>
829 static Type *getType();
834 return T(llvm::Type::getX86_MMXTy(*::context));
837 Bool::Bool(Argument<Bool> argument)
839 storeValue(argument.value);
848 storeValue(Nucleus::createConstantBool(x));
851 Bool::Bool(RValue<Bool> rhs)
853 storeValue(rhs.value);
856 Bool::Bool(const Bool &rhs)
858 Value *value = rhs.loadValue();
862 Bool::Bool(const Reference<Bool> &rhs)
864 Value *value = rhs.loadValue();
868 RValue<Bool> Bool::operator=(RValue<Bool> rhs) const
870 storeValue(rhs.value);
875 RValue<Bool> Bool::operator=(const Bool &rhs) const
877 Value *value = rhs.loadValue();
880 return RValue<Bool>(value);
883 RValue<Bool> Bool::operator=(const Reference<Bool> &rhs) const
885 Value *value = rhs.loadValue();
888 return RValue<Bool>(value);
891 RValue<Bool> operator!(RValue<Bool> val)
893 return RValue<Bool>(Nucleus::createNot(val.value));
896 RValue<Bool> operator&&(RValue<Bool> lhs, RValue<Bool> rhs)
898 return RValue<Bool>(Nucleus::createAnd(lhs.value, rhs.value));
901 RValue<Bool> operator||(RValue<Bool> lhs, RValue<Bool> rhs)
903 return RValue<Bool>(Nucleus::createOr(lhs.value, rhs.value));
906 Type *Bool::getType()
908 return T(llvm::Type::getInt1Ty(*::context));
911 Byte::Byte(Argument<Byte> argument)
913 storeValue(argument.value);
916 Byte::Byte(RValue<Int> cast)
918 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
923 Byte::Byte(RValue<UInt> cast)
925 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
930 Byte::Byte(RValue<UShort> cast)
932 Value *integer = Nucleus::createTrunc(cast.value, Byte::getType());
943 storeValue(Nucleus::createConstantByte((unsigned char)x));
946 Byte::Byte(unsigned char x)
948 storeValue(Nucleus::createConstantByte(x));
951 Byte::Byte(RValue<Byte> rhs)
953 storeValue(rhs.value);
956 Byte::Byte(const Byte &rhs)
958 Value *value = rhs.loadValue();
962 Byte::Byte(const Reference<Byte> &rhs)
964 Value *value = rhs.loadValue();
968 RValue<Byte> Byte::operator=(RValue<Byte> rhs) const
970 storeValue(rhs.value);
975 RValue<Byte> Byte::operator=(const Byte &rhs) const
977 Value *value = rhs.loadValue();
980 return RValue<Byte>(value);
983 RValue<Byte> Byte::operator=(const Reference<Byte> &rhs) const
985 Value *value = rhs.loadValue();
988 return RValue<Byte>(value);
991 RValue<Byte> operator+(RValue<Byte> lhs, RValue<Byte> rhs)
993 return RValue<Byte>(Nucleus::createAdd(lhs.value, rhs.value));
996 RValue<Byte> operator-(RValue<Byte> lhs, RValue<Byte> rhs)
998 return RValue<Byte>(Nucleus::createSub(lhs.value, rhs.value));
1001 RValue<Byte> operator*(RValue<Byte> lhs, RValue<Byte> rhs)
1003 return RValue<Byte>(Nucleus::createMul(lhs.value, rhs.value));
1006 RValue<Byte> operator/(RValue<Byte> lhs, RValue<Byte> rhs)
1008 return RValue<Byte>(Nucleus::createUDiv(lhs.value, rhs.value));
1011 RValue<Byte> operator%(RValue<Byte> lhs, RValue<Byte> rhs)
1013 return RValue<Byte>(Nucleus::createURem(lhs.value, rhs.value));
1016 RValue<Byte> operator&(RValue<Byte> lhs, RValue<Byte> rhs)
1018 return RValue<Byte>(Nucleus::createAnd(lhs.value, rhs.value));
1021 RValue<Byte> operator|(RValue<Byte> lhs, RValue<Byte> rhs)
1023 return RValue<Byte>(Nucleus::createOr(lhs.value, rhs.value));
1026 RValue<Byte> operator^(RValue<Byte> lhs, RValue<Byte> rhs)
1028 return RValue<Byte>(Nucleus::createXor(lhs.value, rhs.value));
1031 RValue<Byte> operator<<(RValue<Byte> lhs, RValue<Byte> rhs)
1033 return RValue<Byte>(Nucleus::createShl(lhs.value, rhs.value));
1036 RValue<Byte> operator>>(RValue<Byte> lhs, RValue<Byte> rhs)
1038 return RValue<Byte>(Nucleus::createLShr(lhs.value, rhs.value));
1041 RValue<Byte> operator+=(const Byte &lhs, RValue<Byte> rhs)
1043 return lhs = lhs + rhs;
1046 RValue<Byte> operator-=(const Byte &lhs, RValue<Byte> rhs)
1048 return lhs = lhs - rhs;
1051 RValue<Byte> operator*=(const Byte &lhs, RValue<Byte> rhs)
1053 return lhs = lhs * rhs;
1056 RValue<Byte> operator/=(const Byte &lhs, RValue<Byte> rhs)
1058 return lhs = lhs / rhs;
1061 RValue<Byte> operator%=(const Byte &lhs, RValue<Byte> rhs)
1063 return lhs = lhs % rhs;
1066 RValue<Byte> operator&=(const Byte &lhs, RValue<Byte> rhs)
1068 return lhs = lhs & rhs;
1071 RValue<Byte> operator|=(const Byte &lhs, RValue<Byte> rhs)
1073 return lhs = lhs | rhs;
1076 RValue<Byte> operator^=(const Byte &lhs, RValue<Byte> rhs)
1078 return lhs = lhs ^ rhs;
1081 RValue<Byte> operator<<=(const Byte &lhs, RValue<Byte> rhs)
1083 return lhs = lhs << rhs;
1086 RValue<Byte> operator>>=(const Byte &lhs, RValue<Byte> rhs)
1088 return lhs = lhs >> rhs;
1091 RValue<Byte> operator+(RValue<Byte> val)
1096 RValue<Byte> operator-(RValue<Byte> val)
1098 return RValue<Byte>(Nucleus::createNeg(val.value));
1101 RValue<Byte> operator~(RValue<Byte> val)
1103 return RValue<Byte>(Nucleus::createNot(val.value));
1106 RValue<Byte> operator++(const Byte &val, int) // Post-increment
1108 RValue<Byte> res = val;
1110 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantByte((unsigned char)1)));
1111 val.storeValue(inc);
1116 const Byte &operator++(const Byte &val) // Pre-increment
1118 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantByte((unsigned char)1)));
1119 val.storeValue(inc);
1124 RValue<Byte> operator--(const Byte &val, int) // Post-decrement
1126 RValue<Byte> res = val;
1128 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantByte((unsigned char)1)));
1129 val.storeValue(inc);
1134 const Byte &operator--(const Byte &val) // Pre-decrement
1136 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantByte((unsigned char)1)));
1137 val.storeValue(inc);
1142 RValue<Bool> operator<(RValue<Byte> lhs, RValue<Byte> rhs)
1144 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1147 RValue<Bool> operator<=(RValue<Byte> lhs, RValue<Byte> rhs)
1149 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1152 RValue<Bool> operator>(RValue<Byte> lhs, RValue<Byte> rhs)
1154 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1157 RValue<Bool> operator>=(RValue<Byte> lhs, RValue<Byte> rhs)
1159 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1162 RValue<Bool> operator!=(RValue<Byte> lhs, RValue<Byte> rhs)
1164 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1167 RValue<Bool> operator==(RValue<Byte> lhs, RValue<Byte> rhs)
1169 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1172 Type *Byte::getType()
1174 return T(llvm::Type::getInt8Ty(*::context));
1177 SByte::SByte(Argument<SByte> argument)
1179 storeValue(argument.value);
1182 SByte::SByte(RValue<Int> cast)
1184 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1186 storeValue(integer);
1189 SByte::SByte(RValue<Short> cast)
1191 Value *integer = Nucleus::createTrunc(cast.value, SByte::getType());
1193 storeValue(integer);
1200 SByte::SByte(signed char x)
1202 storeValue(Nucleus::createConstantByte(x));
1205 SByte::SByte(RValue<SByte> rhs)
1207 storeValue(rhs.value);
1210 SByte::SByte(const SByte &rhs)
1212 Value *value = rhs.loadValue();
1216 SByte::SByte(const Reference<SByte> &rhs)
1218 Value *value = rhs.loadValue();
1222 RValue<SByte> SByte::operator=(RValue<SByte> rhs) const
1224 storeValue(rhs.value);
1229 RValue<SByte> SByte::operator=(const SByte &rhs) const
1231 Value *value = rhs.loadValue();
1234 return RValue<SByte>(value);
1237 RValue<SByte> SByte::operator=(const Reference<SByte> &rhs) const
1239 Value *value = rhs.loadValue();
1242 return RValue<SByte>(value);
1245 RValue<SByte> operator+(RValue<SByte> lhs, RValue<SByte> rhs)
1247 return RValue<SByte>(Nucleus::createAdd(lhs.value, rhs.value));
1250 RValue<SByte> operator-(RValue<SByte> lhs, RValue<SByte> rhs)
1252 return RValue<SByte>(Nucleus::createSub(lhs.value, rhs.value));
1255 RValue<SByte> operator*(RValue<SByte> lhs, RValue<SByte> rhs)
1257 return RValue<SByte>(Nucleus::createMul(lhs.value, rhs.value));
1260 RValue<SByte> operator/(RValue<SByte> lhs, RValue<SByte> rhs)
1262 return RValue<SByte>(Nucleus::createSDiv(lhs.value, rhs.value));
1265 RValue<SByte> operator%(RValue<SByte> lhs, RValue<SByte> rhs)
1267 return RValue<SByte>(Nucleus::createSRem(lhs.value, rhs.value));
1270 RValue<SByte> operator&(RValue<SByte> lhs, RValue<SByte> rhs)
1272 return RValue<SByte>(Nucleus::createAnd(lhs.value, rhs.value));
1275 RValue<SByte> operator|(RValue<SByte> lhs, RValue<SByte> rhs)
1277 return RValue<SByte>(Nucleus::createOr(lhs.value, rhs.value));
1280 RValue<SByte> operator^(RValue<SByte> lhs, RValue<SByte> rhs)
1282 return RValue<SByte>(Nucleus::createXor(lhs.value, rhs.value));
1285 RValue<SByte> operator<<(RValue<SByte> lhs, RValue<SByte> rhs)
1287 return RValue<SByte>(Nucleus::createShl(lhs.value, rhs.value));
1290 RValue<SByte> operator>>(RValue<SByte> lhs, RValue<SByte> rhs)
1292 return RValue<SByte>(Nucleus::createAShr(lhs.value, rhs.value));
1295 RValue<SByte> operator+=(const SByte &lhs, RValue<SByte> rhs)
1297 return lhs = lhs + rhs;
1300 RValue<SByte> operator-=(const SByte &lhs, RValue<SByte> rhs)
1302 return lhs = lhs - rhs;
1305 RValue<SByte> operator*=(const SByte &lhs, RValue<SByte> rhs)
1307 return lhs = lhs * rhs;
1310 RValue<SByte> operator/=(const SByte &lhs, RValue<SByte> rhs)
1312 return lhs = lhs / rhs;
1315 RValue<SByte> operator%=(const SByte &lhs, RValue<SByte> rhs)
1317 return lhs = lhs % rhs;
1320 RValue<SByte> operator&=(const SByte &lhs, RValue<SByte> rhs)
1322 return lhs = lhs & rhs;
1325 RValue<SByte> operator|=(const SByte &lhs, RValue<SByte> rhs)
1327 return lhs = lhs | rhs;
1330 RValue<SByte> operator^=(const SByte &lhs, RValue<SByte> rhs)
1332 return lhs = lhs ^ rhs;
1335 RValue<SByte> operator<<=(const SByte &lhs, RValue<SByte> rhs)
1337 return lhs = lhs << rhs;
1340 RValue<SByte> operator>>=(const SByte &lhs, RValue<SByte> rhs)
1342 return lhs = lhs >> rhs;
1345 RValue<SByte> operator+(RValue<SByte> val)
1350 RValue<SByte> operator-(RValue<SByte> val)
1352 return RValue<SByte>(Nucleus::createNeg(val.value));
1355 RValue<SByte> operator~(RValue<SByte> val)
1357 return RValue<SByte>(Nucleus::createNot(val.value));
1360 RValue<SByte> operator++(const SByte &val, int) // Post-increment
1362 RValue<SByte> res = val;
1364 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantByte((signed char)1)));
1365 val.storeValue(inc);
1370 const SByte &operator++(const SByte &val) // Pre-increment
1372 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantByte((signed char)1)));
1373 val.storeValue(inc);
1378 RValue<SByte> operator--(const SByte &val, int) // Post-decrement
1380 RValue<SByte> res = val;
1382 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantByte((signed char)1)));
1383 val.storeValue(inc);
1388 const SByte &operator--(const SByte &val) // Pre-decrement
1390 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantByte((signed char)1)));
1391 val.storeValue(inc);
1396 RValue<Bool> operator<(RValue<SByte> lhs, RValue<SByte> rhs)
1398 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
1401 RValue<Bool> operator<=(RValue<SByte> lhs, RValue<SByte> rhs)
1403 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
1406 RValue<Bool> operator>(RValue<SByte> lhs, RValue<SByte> rhs)
1408 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
1411 RValue<Bool> operator>=(RValue<SByte> lhs, RValue<SByte> rhs)
1413 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
1416 RValue<Bool> operator!=(RValue<SByte> lhs, RValue<SByte> rhs)
1418 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1421 RValue<Bool> operator==(RValue<SByte> lhs, RValue<SByte> rhs)
1423 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1426 Type *SByte::getType()
1428 return T(llvm::Type::getInt8Ty(*::context));
1431 Short::Short(Argument<Short> argument)
1433 storeValue(argument.value);
1436 Short::Short(RValue<Int> cast)
1438 Value *integer = Nucleus::createTrunc(cast.value, Short::getType());
1440 storeValue(integer);
1447 Short::Short(short x)
1449 storeValue(Nucleus::createConstantShort(x));
1452 Short::Short(RValue<Short> rhs)
1454 storeValue(rhs.value);
1457 Short::Short(const Short &rhs)
1459 Value *value = rhs.loadValue();
1463 Short::Short(const Reference<Short> &rhs)
1465 Value *value = rhs.loadValue();
1469 RValue<Short> Short::operator=(RValue<Short> rhs) const
1471 storeValue(rhs.value);
1476 RValue<Short> Short::operator=(const Short &rhs) const
1478 Value *value = rhs.loadValue();
1481 return RValue<Short>(value);
1484 RValue<Short> Short::operator=(const Reference<Short> &rhs) const
1486 Value *value = rhs.loadValue();
1489 return RValue<Short>(value);
1492 RValue<Short> operator+(RValue<Short> lhs, RValue<Short> rhs)
1494 return RValue<Short>(Nucleus::createAdd(lhs.value, rhs.value));
1497 RValue<Short> operator-(RValue<Short> lhs, RValue<Short> rhs)
1499 return RValue<Short>(Nucleus::createSub(lhs.value, rhs.value));
1502 RValue<Short> operator*(RValue<Short> lhs, RValue<Short> rhs)
1504 return RValue<Short>(Nucleus::createMul(lhs.value, rhs.value));
1507 RValue<Short> operator/(RValue<Short> lhs, RValue<Short> rhs)
1509 return RValue<Short>(Nucleus::createSDiv(lhs.value, rhs.value));
1512 RValue<Short> operator%(RValue<Short> lhs, RValue<Short> rhs)
1514 return RValue<Short>(Nucleus::createSRem(lhs.value, rhs.value));
1517 RValue<Short> operator&(RValue<Short> lhs, RValue<Short> rhs)
1519 return RValue<Short>(Nucleus::createAnd(lhs.value, rhs.value));
1522 RValue<Short> operator|(RValue<Short> lhs, RValue<Short> rhs)
1524 return RValue<Short>(Nucleus::createOr(lhs.value, rhs.value));
1527 RValue<Short> operator^(RValue<Short> lhs, RValue<Short> rhs)
1529 return RValue<Short>(Nucleus::createXor(lhs.value, rhs.value));
1532 RValue<Short> operator<<(RValue<Short> lhs, RValue<Short> rhs)
1534 return RValue<Short>(Nucleus::createShl(lhs.value, rhs.value));
1537 RValue<Short> operator>>(RValue<Short> lhs, RValue<Short> rhs)
1539 return RValue<Short>(Nucleus::createAShr(lhs.value, rhs.value));
1542 RValue<Short> operator+=(const Short &lhs, RValue<Short> rhs)
1544 return lhs = lhs + rhs;
1547 RValue<Short> operator-=(const Short &lhs, RValue<Short> rhs)
1549 return lhs = lhs - rhs;
1552 RValue<Short> operator*=(const Short &lhs, RValue<Short> rhs)
1554 return lhs = lhs * rhs;
1557 RValue<Short> operator/=(const Short &lhs, RValue<Short> rhs)
1559 return lhs = lhs / rhs;
1562 RValue<Short> operator%=(const Short &lhs, RValue<Short> rhs)
1564 return lhs = lhs % rhs;
1567 RValue<Short> operator&=(const Short &lhs, RValue<Short> rhs)
1569 return lhs = lhs & rhs;
1572 RValue<Short> operator|=(const Short &lhs, RValue<Short> rhs)
1574 return lhs = lhs | rhs;
1577 RValue<Short> operator^=(const Short &lhs, RValue<Short> rhs)
1579 return lhs = lhs ^ rhs;
1582 RValue<Short> operator<<=(const Short &lhs, RValue<Short> rhs)
1584 return lhs = lhs << rhs;
1587 RValue<Short> operator>>=(const Short &lhs, RValue<Short> rhs)
1589 return lhs = lhs >> rhs;
1592 RValue<Short> operator+(RValue<Short> val)
1597 RValue<Short> operator-(RValue<Short> val)
1599 return RValue<Short>(Nucleus::createNeg(val.value));
1602 RValue<Short> operator~(RValue<Short> val)
1604 return RValue<Short>(Nucleus::createNot(val.value));
1607 RValue<Short> operator++(const Short &val, int) // Post-increment
1609 RValue<Short> res = val;
1611 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantShort((short)1)));
1612 val.storeValue(inc);
1617 const Short &operator++(const Short &val) // Pre-increment
1619 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantShort((short)1)));
1620 val.storeValue(inc);
1625 RValue<Short> operator--(const Short &val, int) // Post-decrement
1627 RValue<Short> res = val;
1629 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantShort((short)1)));
1630 val.storeValue(inc);
1635 const Short &operator--(const Short &val) // Pre-decrement
1637 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantShort((short)1)));
1638 val.storeValue(inc);
1643 RValue<Bool> operator<(RValue<Short> lhs, RValue<Short> rhs)
1645 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
1648 RValue<Bool> operator<=(RValue<Short> lhs, RValue<Short> rhs)
1650 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
1653 RValue<Bool> operator>(RValue<Short> lhs, RValue<Short> rhs)
1655 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
1658 RValue<Bool> operator>=(RValue<Short> lhs, RValue<Short> rhs)
1660 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
1663 RValue<Bool> operator!=(RValue<Short> lhs, RValue<Short> rhs)
1665 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1668 RValue<Bool> operator==(RValue<Short> lhs, RValue<Short> rhs)
1670 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1673 Type *Short::getType()
1675 return T(llvm::Type::getInt16Ty(*::context));
1678 UShort::UShort(Argument<UShort> argument)
1680 storeValue(argument.value);
1683 UShort::UShort(RValue<UInt> cast)
1685 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
1687 storeValue(integer);
1690 UShort::UShort(RValue<Int> cast)
1692 Value *integer = Nucleus::createTrunc(cast.value, UShort::getType());
1694 storeValue(integer);
1701 UShort::UShort(unsigned short x)
1703 storeValue(Nucleus::createConstantShort(x));
1706 UShort::UShort(RValue<UShort> rhs)
1708 storeValue(rhs.value);
1711 UShort::UShort(const UShort &rhs)
1713 Value *value = rhs.loadValue();
1717 UShort::UShort(const Reference<UShort> &rhs)
1719 Value *value = rhs.loadValue();
1723 RValue<UShort> UShort::operator=(RValue<UShort> rhs) const
1725 storeValue(rhs.value);
1730 RValue<UShort> UShort::operator=(const UShort &rhs) const
1732 Value *value = rhs.loadValue();
1735 return RValue<UShort>(value);
1738 RValue<UShort> UShort::operator=(const Reference<UShort> &rhs) const
1740 Value *value = rhs.loadValue();
1743 return RValue<UShort>(value);
1746 RValue<UShort> operator+(RValue<UShort> lhs, RValue<UShort> rhs)
1748 return RValue<UShort>(Nucleus::createAdd(lhs.value, rhs.value));
1751 RValue<UShort> operator-(RValue<UShort> lhs, RValue<UShort> rhs)
1753 return RValue<UShort>(Nucleus::createSub(lhs.value, rhs.value));
1756 RValue<UShort> operator*(RValue<UShort> lhs, RValue<UShort> rhs)
1758 return RValue<UShort>(Nucleus::createMul(lhs.value, rhs.value));
1761 RValue<UShort> operator/(RValue<UShort> lhs, RValue<UShort> rhs)
1763 return RValue<UShort>(Nucleus::createUDiv(lhs.value, rhs.value));
1766 RValue<UShort> operator%(RValue<UShort> lhs, RValue<UShort> rhs)
1768 return RValue<UShort>(Nucleus::createURem(lhs.value, rhs.value));
1771 RValue<UShort> operator&(RValue<UShort> lhs, RValue<UShort> rhs)
1773 return RValue<UShort>(Nucleus::createAnd(lhs.value, rhs.value));
1776 RValue<UShort> operator|(RValue<UShort> lhs, RValue<UShort> rhs)
1778 return RValue<UShort>(Nucleus::createOr(lhs.value, rhs.value));
1781 RValue<UShort> operator^(RValue<UShort> lhs, RValue<UShort> rhs)
1783 return RValue<UShort>(Nucleus::createXor(lhs.value, rhs.value));
1786 RValue<UShort> operator<<(RValue<UShort> lhs, RValue<UShort> rhs)
1788 return RValue<UShort>(Nucleus::createShl(lhs.value, rhs.value));
1791 RValue<UShort> operator>>(RValue<UShort> lhs, RValue<UShort> rhs)
1793 return RValue<UShort>(Nucleus::createLShr(lhs.value, rhs.value));
1796 RValue<UShort> operator+=(const UShort &lhs, RValue<UShort> rhs)
1798 return lhs = lhs + rhs;
1801 RValue<UShort> operator-=(const UShort &lhs, RValue<UShort> rhs)
1803 return lhs = lhs - rhs;
1806 RValue<UShort> operator*=(const UShort &lhs, RValue<UShort> rhs)
1808 return lhs = lhs * rhs;
1811 RValue<UShort> operator/=(const UShort &lhs, RValue<UShort> rhs)
1813 return lhs = lhs / rhs;
1816 RValue<UShort> operator%=(const UShort &lhs, RValue<UShort> rhs)
1818 return lhs = lhs % rhs;
1821 RValue<UShort> operator&=(const UShort &lhs, RValue<UShort> rhs)
1823 return lhs = lhs & rhs;
1826 RValue<UShort> operator|=(const UShort &lhs, RValue<UShort> rhs)
1828 return lhs = lhs | rhs;
1831 RValue<UShort> operator^=(const UShort &lhs, RValue<UShort> rhs)
1833 return lhs = lhs ^ rhs;
1836 RValue<UShort> operator<<=(const UShort &lhs, RValue<UShort> rhs)
1838 return lhs = lhs << rhs;
1841 RValue<UShort> operator>>=(const UShort &lhs, RValue<UShort> rhs)
1843 return lhs = lhs >> rhs;
1846 RValue<UShort> operator+(RValue<UShort> val)
1851 RValue<UShort> operator-(RValue<UShort> val)
1853 return RValue<UShort>(Nucleus::createNeg(val.value));
1856 RValue<UShort> operator~(RValue<UShort> val)
1858 return RValue<UShort>(Nucleus::createNot(val.value));
1861 RValue<UShort> operator++(const UShort &val, int) // Post-increment
1863 RValue<UShort> res = val;
1865 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantShort((unsigned short)1)));
1866 val.storeValue(inc);
1871 const UShort &operator++(const UShort &val) // Pre-increment
1873 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantShort((unsigned short)1)));
1874 val.storeValue(inc);
1879 RValue<UShort> operator--(const UShort &val, int) // Post-decrement
1881 RValue<UShort> res = val;
1883 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantShort((unsigned short)1)));
1884 val.storeValue(inc);
1889 const UShort &operator--(const UShort &val) // Pre-decrement
1891 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantShort((unsigned short)1)));
1892 val.storeValue(inc);
1897 RValue<Bool> operator<(RValue<UShort> lhs, RValue<UShort> rhs)
1899 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
1902 RValue<Bool> operator<=(RValue<UShort> lhs, RValue<UShort> rhs)
1904 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
1907 RValue<Bool> operator>(RValue<UShort> lhs, RValue<UShort> rhs)
1909 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
1912 RValue<Bool> operator>=(RValue<UShort> lhs, RValue<UShort> rhs)
1914 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
1917 RValue<Bool> operator!=(RValue<UShort> lhs, RValue<UShort> rhs)
1919 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
1922 RValue<Bool> operator==(RValue<UShort> lhs, RValue<UShort> rhs)
1924 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
1927 Type *UShort::getType()
1929 return T(llvm::Type::getInt16Ty(*::context));
1932 Byte4::Byte4(RValue<Byte8> cast)
1934 // xyzw.parent = this;
1936 storeValue(Nucleus::createTrunc(Nucleus::createBitCast(cast.value, Long::getType()), Int::getType()));
1939 Byte4::Byte4(const Reference<Byte4> &rhs)
1941 // xyzw.parent = this;
1943 Value *value = rhs.loadValue();
1947 Type *Byte4::getType()
1950 return T(VectorType::get(Byte::getType(), 4));
1952 return UInt::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block
1956 Type *SByte4::getType()
1959 return T(VectorType::get(SByte::getType(), 4));
1961 return Int::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block
1967 // xyzw.parent = this;
1970 Byte8::Byte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
1972 // xyzw.parent = this;
1974 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
1975 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(Byte::getType(), 8))));
1977 storeValue(Nucleus::createBitCast(vector, getType()));
1980 Byte8::Byte8(RValue<Byte8> rhs)
1982 // xyzw.parent = this;
1984 storeValue(rhs.value);
1987 Byte8::Byte8(const Byte8 &rhs)
1989 // xyzw.parent = this;
1991 Value *value = rhs.loadValue();
1995 Byte8::Byte8(const Reference<Byte8> &rhs)
1997 // xyzw.parent = this;
1999 Value *value = rhs.loadValue();
2003 RValue<Byte8> Byte8::operator=(RValue<Byte8> rhs) const
2005 storeValue(rhs.value);
2010 RValue<Byte8> Byte8::operator=(const Byte8 &rhs) const
2012 Value *value = rhs.loadValue();
2015 return RValue<Byte8>(value);
2018 RValue<Byte8> Byte8::operator=(const Reference<Byte8> &rhs) const
2020 Value *value = rhs.loadValue();
2023 return RValue<Byte8>(value);
2026 RValue<Byte8> operator+(RValue<Byte8> lhs, RValue<Byte8> rhs)
2028 if(CPUID::supportsMMX2())
2030 return x86::paddb(lhs, rhs);
2034 return RValue<Byte8>(Nucleus::createAdd(lhs.value, rhs.value));
2038 RValue<Byte8> operator-(RValue<Byte8> lhs, RValue<Byte8> rhs)
2040 if(CPUID::supportsMMX2())
2042 return x86::psubb(lhs, rhs);
2046 return RValue<Byte8>(Nucleus::createSub(lhs.value, rhs.value));
2050 // RValue<Byte8> operator*(RValue<Byte8> lhs, RValue<Byte8> rhs)
2052 // return RValue<Byte8>(Nucleus::createMul(lhs.value, rhs.value));
2055 // RValue<Byte8> operator/(RValue<Byte8> lhs, RValue<Byte8> rhs)
2057 // return RValue<Byte8>(Nucleus::createUDiv(lhs.value, rhs.value));
2060 // RValue<Byte8> operator%(RValue<Byte8> lhs, RValue<Byte8> rhs)
2062 // return RValue<Byte8>(Nucleus::createURem(lhs.value, rhs.value));
2065 RValue<Byte8> operator&(RValue<Byte8> lhs, RValue<Byte8> rhs)
2067 if(CPUID::supportsMMX2())
2069 return As<Byte8>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
2073 return RValue<Byte8>(Nucleus::createAnd(lhs.value, rhs.value));
2077 RValue<Byte8> operator|(RValue<Byte8> lhs, RValue<Byte8> rhs)
2079 if(CPUID::supportsMMX2())
2081 return As<Byte8>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
2085 return RValue<Byte8>(Nucleus::createOr(lhs.value, rhs.value));
2089 RValue<Byte8> operator^(RValue<Byte8> lhs, RValue<Byte8> rhs)
2091 if(CPUID::supportsMMX2())
2093 return As<Byte8>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
2097 return RValue<Byte8>(Nucleus::createXor(lhs.value, rhs.value));
2101 // RValue<Byte8> operator<<(RValue<Byte8> lhs, unsigned char rhs)
2103 // return RValue<Byte8>(Nucleus::createShl(lhs.value, rhs.value));
2106 // RValue<Byte8> operator>>(RValue<Byte8> lhs, unsigned char rhs)
2108 // return RValue<Byte8>(Nucleus::createLShr(lhs.value, rhs.value));
2111 RValue<Byte8> operator+=(const Byte8 &lhs, RValue<Byte8> rhs)
2113 return lhs = lhs + rhs;
2116 RValue<Byte8> operator-=(const Byte8 &lhs, RValue<Byte8> rhs)
2118 return lhs = lhs - rhs;
2121 // RValue<Byte8> operator*=(const Byte8 &lhs, RValue<Byte8> rhs)
2123 // return lhs = lhs * rhs;
2126 // RValue<Byte8> operator/=(const Byte8 &lhs, RValue<Byte8> rhs)
2128 // return lhs = lhs / rhs;
2131 // RValue<Byte8> operator%=(const Byte8 &lhs, RValue<Byte8> rhs)
2133 // return lhs = lhs % rhs;
2136 RValue<Byte8> operator&=(const Byte8 &lhs, RValue<Byte8> rhs)
2138 return lhs = lhs & rhs;
2141 RValue<Byte8> operator|=(const Byte8 &lhs, RValue<Byte8> rhs)
2143 return lhs = lhs | rhs;
2146 RValue<Byte8> operator^=(const Byte8 &lhs, RValue<Byte8> rhs)
2148 return lhs = lhs ^ rhs;
2151 // RValue<Byte8> operator<<=(const Byte8 &lhs, RValue<Byte8> rhs)
2153 // return lhs = lhs << rhs;
2156 // RValue<Byte8> operator>>=(const Byte8 &lhs, RValue<Byte8> rhs)
2158 // return lhs = lhs >> rhs;
2161 // RValue<Byte8> operator+(RValue<Byte8> val)
2166 // RValue<Byte8> operator-(RValue<Byte8> val)
2168 // return RValue<Byte8>(Nucleus::createNeg(val.value));
2171 RValue<Byte8> operator~(RValue<Byte8> val)
2173 if(CPUID::supportsMMX2())
2175 return val ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
2179 return RValue<Byte8>(Nucleus::createNot(val.value));
2183 RValue<Byte8> AddSat(RValue<Byte8> x, RValue<Byte8> y)
2185 return x86::paddusb(x, y);
2188 RValue<Byte8> SubSat(RValue<Byte8> x, RValue<Byte8> y)
2190 return x86::psubusb(x, y);
2193 RValue<Short4> Unpack(RValue<Byte4> x)
2195 Value *int2 = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), x.value, 0);
2196 Value *byte8 = Nucleus::createBitCast(int2, Byte8::getType());
2198 return UnpackLow(RValue<Byte8>(byte8), RValue<Byte8>(byte8));
2201 RValue<Short4> UnpackLow(RValue<Byte8> x, RValue<Byte8> y)
2203 if(CPUID::supportsMMX2())
2205 return x86::punpcklbw(x, y);
2209 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
2210 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
2212 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2216 RValue<Short4> UnpackHigh(RValue<Byte8> x, RValue<Byte8> y)
2218 if(CPUID::supportsMMX2())
2220 return x86::punpckhbw(x, y);
2224 int shuffle[8] = {4, 12, 5, 13, 6, 14, 7, 15};
2225 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
2227 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2231 RValue<Int> SignMask(RValue<Byte8> x)
2233 return x86::pmovmskb(x);
2236 // RValue<Byte8> CmpGT(RValue<Byte8> x, RValue<Byte8> y)
2238 // return x86::pcmpgtb(x, y); // FIXME: Signedness
2241 RValue<Byte8> CmpEQ(RValue<Byte8> x, RValue<Byte8> y)
2243 return x86::pcmpeqb(x, y);
2246 Type *Byte8::getType()
2248 if(CPUID::supportsMMX2())
2250 return MMX::getType();
2254 return T(VectorType::get(Byte::getType(), 8));
2260 // xyzw.parent = this;
2263 SByte8::SByte8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7)
2265 // xyzw.parent = this;
2267 int64_t constantVector[8] = {x0, x1, x2, x3, x4, x5, x6, x7};
2268 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(SByte::getType(), 8))));
2270 storeValue(Nucleus::createBitCast(vector, getType()));
2273 SByte8::SByte8(RValue<SByte8> rhs)
2275 // xyzw.parent = this;
2277 storeValue(rhs.value);
2280 SByte8::SByte8(const SByte8 &rhs)
2282 // xyzw.parent = this;
2284 Value *value = rhs.loadValue();
2288 SByte8::SByte8(const Reference<SByte8> &rhs)
2290 // xyzw.parent = this;
2292 Value *value = rhs.loadValue();
2296 RValue<SByte8> SByte8::operator=(RValue<SByte8> rhs) const
2298 storeValue(rhs.value);
2303 RValue<SByte8> SByte8::operator=(const SByte8 &rhs) const
2305 Value *value = rhs.loadValue();
2308 return RValue<SByte8>(value);
2311 RValue<SByte8> SByte8::operator=(const Reference<SByte8> &rhs) const
2313 Value *value = rhs.loadValue();
2316 return RValue<SByte8>(value);
2319 RValue<SByte8> operator+(RValue<SByte8> lhs, RValue<SByte8> rhs)
2321 if(CPUID::supportsMMX2())
2323 return As<SByte8>(x86::paddb(As<Byte8>(lhs), As<Byte8>(rhs)));
2327 return RValue<SByte8>(Nucleus::createAdd(lhs.value, rhs.value));
2331 RValue<SByte8> operator-(RValue<SByte8> lhs, RValue<SByte8> rhs)
2333 if(CPUID::supportsMMX2())
2335 return As<SByte8>(x86::psubb(As<Byte8>(lhs), As<Byte8>(rhs)));
2339 return RValue<SByte8>(Nucleus::createSub(lhs.value, rhs.value));
2343 // RValue<SByte8> operator*(RValue<SByte8> lhs, RValue<SByte8> rhs)
2345 // return RValue<SByte8>(Nucleus::createMul(lhs.value, rhs.value));
2348 // RValue<SByte8> operator/(RValue<SByte8> lhs, RValue<SByte8> rhs)
2350 // return RValue<SByte8>(Nucleus::createSDiv(lhs.value, rhs.value));
2353 // RValue<SByte8> operator%(RValue<SByte8> lhs, RValue<SByte8> rhs)
2355 // return RValue<SByte8>(Nucleus::createSRem(lhs.value, rhs.value));
2358 RValue<SByte8> operator&(RValue<SByte8> lhs, RValue<SByte8> rhs)
2360 return RValue<SByte8>(Nucleus::createAnd(lhs.value, rhs.value));
2363 RValue<SByte8> operator|(RValue<SByte8> lhs, RValue<SByte8> rhs)
2365 return RValue<SByte8>(Nucleus::createOr(lhs.value, rhs.value));
2368 RValue<SByte8> operator^(RValue<SByte8> lhs, RValue<SByte8> rhs)
2370 return RValue<SByte8>(Nucleus::createXor(lhs.value, rhs.value));
2373 // RValue<SByte8> operator<<(RValue<SByte8> lhs, unsigned char rhs)
2375 // return RValue<SByte8>(Nucleus::createShl(lhs.value, rhs.value));
2378 // RValue<SByte8> operator>>(RValue<SByte8> lhs, unsigned char rhs)
2380 // return RValue<SByte8>(Nucleus::createAShr(lhs.value, rhs.value));
2383 RValue<SByte8> operator+=(const SByte8 &lhs, RValue<SByte8> rhs)
2385 return lhs = lhs + rhs;
2388 RValue<SByte8> operator-=(const SByte8 &lhs, RValue<SByte8> rhs)
2390 return lhs = lhs - rhs;
2393 // RValue<SByte8> operator*=(const SByte8 &lhs, RValue<SByte8> rhs)
2395 // return lhs = lhs * rhs;
2398 // RValue<SByte8> operator/=(const SByte8 &lhs, RValue<SByte8> rhs)
2400 // return lhs = lhs / rhs;
2403 // RValue<SByte8> operator%=(const SByte8 &lhs, RValue<SByte8> rhs)
2405 // return lhs = lhs % rhs;
2408 RValue<SByte8> operator&=(const SByte8 &lhs, RValue<SByte8> rhs)
2410 return lhs = lhs & rhs;
2413 RValue<SByte8> operator|=(const SByte8 &lhs, RValue<SByte8> rhs)
2415 return lhs = lhs | rhs;
2418 RValue<SByte8> operator^=(const SByte8 &lhs, RValue<SByte8> rhs)
2420 return lhs = lhs ^ rhs;
2423 // RValue<SByte8> operator<<=(const SByte8 &lhs, RValue<SByte8> rhs)
2425 // return lhs = lhs << rhs;
2428 // RValue<SByte8> operator>>=(const SByte8 &lhs, RValue<SByte8> rhs)
2430 // return lhs = lhs >> rhs;
2433 // RValue<SByte8> operator+(RValue<SByte8> val)
2438 // RValue<SByte8> operator-(RValue<SByte8> val)
2440 // return RValue<SByte8>(Nucleus::createNeg(val.value));
2443 RValue<SByte8> operator~(RValue<SByte8> val)
2445 if(CPUID::supportsMMX2())
2447 return val ^ SByte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
2451 return RValue<SByte8>(Nucleus::createNot(val.value));
2455 RValue<SByte8> AddSat(RValue<SByte8> x, RValue<SByte8> y)
2457 return x86::paddsb(x, y);
2460 RValue<SByte8> SubSat(RValue<SByte8> x, RValue<SByte8> y)
2462 return x86::psubsb(x, y);
2465 RValue<Short4> UnpackLow(RValue<SByte8> x, RValue<SByte8> y)
2467 if(CPUID::supportsMMX2())
2469 return As<Short4>(x86::punpcklbw(As<Byte8>(x), As<Byte8>(y)));
2473 int shuffle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
2474 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
2476 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2480 RValue<Short4> UnpackHigh(RValue<SByte8> x, RValue<SByte8> y)
2482 if(CPUID::supportsMMX2())
2484 return As<Short4>(x86::punpckhbw(As<Byte8>(x), As<Byte8>(y)));
2488 int shuffle[8] = {4, 12, 5, 13, 6, 14, 7, 15};
2489 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
2491 return RValue<Short4>(Nucleus::createBitCast(packed, Short4::getType()));
2495 RValue<Int> SignMask(RValue<SByte8> x)
2497 return x86::pmovmskb(As<Byte8>(x));
2500 RValue<Byte8> CmpGT(RValue<SByte8> x, RValue<SByte8> y)
2502 return x86::pcmpgtb(x, y);
2505 RValue<Byte8> CmpEQ(RValue<SByte8> x, RValue<SByte8> y)
2507 return x86::pcmpeqb(As<Byte8>(x), As<Byte8>(y));
2510 Type *SByte8::getType()
2512 if(CPUID::supportsMMX2())
2514 return MMX::getType();
2518 return T(VectorType::get(SByte::getType(), 8));
2522 Byte16::Byte16(RValue<Byte16> rhs)
2524 // xyzw.parent = this;
2526 storeValue(rhs.value);
2529 Byte16::Byte16(const Byte16 &rhs)
2531 // xyzw.parent = this;
2533 Value *value = rhs.loadValue();
2537 Byte16::Byte16(const Reference<Byte16> &rhs)
2539 // xyzw.parent = this;
2541 Value *value = rhs.loadValue();
2545 RValue<Byte16> Byte16::operator=(RValue<Byte16> rhs) const
2547 storeValue(rhs.value);
2552 RValue<Byte16> Byte16::operator=(const Byte16 &rhs) const
2554 Value *value = rhs.loadValue();
2557 return RValue<Byte16>(value);
2560 RValue<Byte16> Byte16::operator=(const Reference<Byte16> &rhs) const
2562 Value *value = rhs.loadValue();
2565 return RValue<Byte16>(value);
2568 Type *Byte16::getType()
2570 return T(VectorType::get(Byte::getType(), 16));
2573 Type *SByte16::getType()
2575 return T( VectorType::get(SByte::getType(), 16));
2578 Short2::Short2(RValue<Short4> cast)
2580 storeValue(Nucleus::createTrunc(Nucleus::createBitCast(cast.value, Long::getType()), UInt::getType()));
2583 Type *Short2::getType()
2586 return T(VectorType::get(Short::getType(), 2));
2588 return UInt::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block
2592 UShort2::UShort2(RValue<UShort4> cast)
2594 storeValue(Nucleus::createTrunc(Nucleus::createBitCast(cast.value, Long::getType()), UInt::getType()));
2597 Type *UShort2::getType()
2600 return T(VectorType::get(UShort::getType(), 2));
2602 return UInt::getType(); // FIXME: LLVM doesn't manipulate it as one 32-bit block
2606 Short4::Short4(RValue<Int> cast)
2608 Value *extend = Nucleus::createZExt(cast.value, Long::getType());
2609 Value *swizzle = Swizzle(RValue<Short4>(extend), 0x00).value;
2611 storeValue(swizzle);
2614 Short4::Short4(RValue<Int4> cast)
2616 Value *short8 = Nucleus::createBitCast(cast.value, Short8::getType());
2618 #if 0 // FIXME: Check codegen (pshuflw phshufhw pshufd)
2620 pack[0] = Nucleus::createConstantInt(0);
2621 pack[1] = Nucleus::createConstantInt(2);
2622 pack[2] = Nucleus::createConstantInt(4);
2623 pack[3] = Nucleus::createConstantInt(6);
2625 Value *short4 = Nucleus::createShuffleVector(short8, short8, Nucleus::createConstantVector(pack, 4));
2629 // FIXME: Use Swizzle<Short8>
2630 if(!CPUID::supportsSSSE3())
2632 int pshuflw[8] = {0, 2, 0, 2, 4, 5, 6, 7};
2633 int pshufhw[8] = {0, 1, 2, 3, 4, 6, 4, 6};
2635 Value *shuffle1 = Nucleus::createShuffleVector(short8, short8, pshuflw);
2636 Value *shuffle2 = Nucleus::createShuffleVector(shuffle1, shuffle1, pshufhw);
2637 Value *int4 = Nucleus::createBitCast(shuffle2, Int4::getType());
2638 packed = createSwizzle4(int4, 0x88);
2642 int pshufb[16] = {0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13};
2643 Value *byte16 = Nucleus::createBitCast(cast.value, Byte16::getType());
2644 packed = Nucleus::createShuffleVector(byte16, byte16, pshufb);
2647 #if 0 // FIXME: No optimal instruction selection
2648 Value *qword2 = Nucleus::createBitCast(packed, T(VectorType::get(Long::getType(), 2)));
2649 Value *element = Nucleus::createExtractElement(qword2, 0);
2650 Value *short4 = Nucleus::createBitCast(element, Short4::getType());
2651 #else // FIXME: Requires SSE
2652 Value *int2 = RValue<Int2>(Int2(RValue<Int4>(packed))).value;
2653 Value *short4 = Nucleus::createBitCast(int2, Short4::getType());
2660 // Short4::Short4(RValue<Float> cast)
2664 Short4::Short4(RValue<Float4> cast)
2666 Int4 v4i32 = Int4(cast);
2667 v4i32 = As<Int4>(x86::packssdw(v4i32, v4i32));
2669 storeValue(As<Short4>(Int2(v4i32)).value);
2674 // xyzw.parent = this;
2677 Short4::Short4(short xyzw)
2679 // xyzw.parent = this;
2681 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
2682 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(Short::getType(), 4))));
2684 storeValue(Nucleus::createBitCast(vector, getType()));
2687 Short4::Short4(short x, short y, short z, short w)
2689 // xyzw.parent = this;
2691 int64_t constantVector[4] = {x, y, z, w};
2692 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(Short::getType(), 4))));
2694 storeValue(Nucleus::createBitCast(vector, getType()));
2697 Short4::Short4(RValue<Short4> rhs)
2699 // xyzw.parent = this;
2701 storeValue(rhs.value);
2704 Short4::Short4(const Short4 &rhs)
2706 // xyzw.parent = this;
2708 Value *value = rhs.loadValue();
2712 Short4::Short4(const Reference<Short4> &rhs)
2714 // xyzw.parent = this;
2716 Value *value = rhs.loadValue();
2720 Short4::Short4(RValue<UShort4> rhs)
2722 // xyzw.parent = this;
2724 storeValue(rhs.value);
2727 Short4::Short4(const UShort4 &rhs)
2729 // xyzw.parent = this;
2731 storeValue(rhs.loadValue());
2734 Short4::Short4(const Reference<UShort4> &rhs)
2736 // xyzw.parent = this;
2738 storeValue(rhs.loadValue());
2741 RValue<Short4> Short4::operator=(RValue<Short4> rhs) const
2743 storeValue(rhs.value);
2748 RValue<Short4> Short4::operator=(const Short4 &rhs) const
2750 Value *value = rhs.loadValue();
2753 return RValue<Short4>(value);
2756 RValue<Short4> Short4::operator=(const Reference<Short4> &rhs) const
2758 Value *value = rhs.loadValue();
2761 return RValue<Short4>(value);
2764 RValue<Short4> Short4::operator=(RValue<UShort4> rhs) const
2766 storeValue(rhs.value);
2768 return RValue<Short4>(rhs);
2771 RValue<Short4> Short4::operator=(const UShort4 &rhs) const
2773 Value *value = rhs.loadValue();
2776 return RValue<Short4>(value);
2779 RValue<Short4> Short4::operator=(const Reference<UShort4> &rhs) const
2781 Value *value = rhs.loadValue();
2784 return RValue<Short4>(value);
2787 RValue<Short4> operator+(RValue<Short4> lhs, RValue<Short4> rhs)
2789 if(CPUID::supportsMMX2())
2791 return x86::paddw(lhs, rhs);
2795 return RValue<Short4>(Nucleus::createAdd(lhs.value, rhs.value));
2799 RValue<Short4> operator-(RValue<Short4> lhs, RValue<Short4> rhs)
2801 if(CPUID::supportsMMX2())
2803 return x86::psubw(lhs, rhs);
2807 return RValue<Short4>(Nucleus::createSub(lhs.value, rhs.value));
2811 RValue<Short4> operator*(RValue<Short4> lhs, RValue<Short4> rhs)
2813 if(CPUID::supportsMMX2())
2815 return x86::pmullw(lhs, rhs);
2819 return RValue<Short4>(Nucleus::createMul(lhs.value, rhs.value));
2823 // RValue<Short4> operator/(RValue<Short4> lhs, RValue<Short4> rhs)
2825 // return RValue<Short4>(Nucleus::createSDiv(lhs.value, rhs.value));
2828 // RValue<Short4> operator%(RValue<Short4> lhs, RValue<Short4> rhs)
2830 // return RValue<Short4>(Nucleus::createSRem(lhs.value, rhs.value));
2833 RValue<Short4> operator&(RValue<Short4> lhs, RValue<Short4> rhs)
2835 if(CPUID::supportsMMX2())
2837 return x86::pand(lhs, rhs);
2841 return RValue<Short4>(Nucleus::createAnd(lhs.value, rhs.value));
2845 RValue<Short4> operator|(RValue<Short4> lhs, RValue<Short4> rhs)
2847 if(CPUID::supportsMMX2())
2849 return x86::por(lhs, rhs);
2853 return RValue<Short4>(Nucleus::createOr(lhs.value, rhs.value));
2857 RValue<Short4> operator^(RValue<Short4> lhs, RValue<Short4> rhs)
2859 if(CPUID::supportsMMX2())
2861 return x86::pxor(lhs, rhs);
2865 return RValue<Short4>(Nucleus::createXor(lhs.value, rhs.value));
2869 RValue<Short4> operator<<(RValue<Short4> lhs, unsigned char rhs)
2871 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2873 return x86::psllw(lhs, rhs);
2876 RValue<Short4> operator>>(RValue<Short4> lhs, unsigned char rhs)
2878 // return RValue<Short4>(Nucleus::createAShr(lhs.value, rhs.value));
2880 return x86::psraw(lhs, rhs);
2883 RValue<Short4> operator<<(RValue<Short4> lhs, RValue<Long1> rhs)
2885 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
2887 return x86::psllw(lhs, rhs);
2890 RValue<Short4> operator>>(RValue<Short4> lhs, RValue<Long1> rhs)
2892 // return RValue<Short4>(Nucleus::createAShr(lhs.value, rhs.value));
2894 return x86::psraw(lhs, rhs);
2897 RValue<Short4> operator+=(const Short4 &lhs, RValue<Short4> rhs)
2899 return lhs = lhs + rhs;
2902 RValue<Short4> operator-=(const Short4 &lhs, RValue<Short4> rhs)
2904 return lhs = lhs - rhs;
2907 RValue<Short4> operator*=(const Short4 &lhs, RValue<Short4> rhs)
2909 return lhs = lhs * rhs;
2912 // RValue<Short4> operator/=(const Short4 &lhs, RValue<Short4> rhs)
2914 // return lhs = lhs / rhs;
2917 // RValue<Short4> operator%=(const Short4 &lhs, RValue<Short4> rhs)
2919 // return lhs = lhs % rhs;
2922 RValue<Short4> operator&=(const Short4 &lhs, RValue<Short4> rhs)
2924 return lhs = lhs & rhs;
2927 RValue<Short4> operator|=(const Short4 &lhs, RValue<Short4> rhs)
2929 return lhs = lhs | rhs;
2932 RValue<Short4> operator^=(const Short4 &lhs, RValue<Short4> rhs)
2934 return lhs = lhs ^ rhs;
2937 RValue<Short4> operator<<=(const Short4 &lhs, unsigned char rhs)
2939 return lhs = lhs << rhs;
2942 RValue<Short4> operator>>=(const Short4 &lhs, unsigned char rhs)
2944 return lhs = lhs >> rhs;
2947 RValue<Short4> operator<<=(const Short4 &lhs, RValue<Long1> rhs)
2949 return lhs = lhs << rhs;
2952 RValue<Short4> operator>>=(const Short4 &lhs, RValue<Long1> rhs)
2954 return lhs = lhs >> rhs;
2957 // RValue<Short4> operator+(RValue<Short4> val)
2962 RValue<Short4> operator-(RValue<Short4> val)
2964 if(CPUID::supportsMMX2())
2966 return Short4(0, 0, 0, 0) - val;
2970 return RValue<Short4>(Nucleus::createNeg(val.value));
2974 RValue<Short4> operator~(RValue<Short4> val)
2976 if(CPUID::supportsMMX2())
2978 return val ^ Short4(0xFFFFu, 0xFFFFu, 0xFFFFu, 0xFFFFu);
2982 return RValue<Short4>(Nucleus::createNot(val.value));
2986 RValue<Short4> RoundShort4(RValue<Float4> cast)
2988 RValue<Int4> v4i32 = x86::cvtps2dq(cast);
2989 RValue<Short8> v8i16 = x86::packssdw(v4i32, v4i32);
2991 return As<Short4>(Int2(As<Int4>(v8i16)));
2994 RValue<Short4> Max(RValue<Short4> x, RValue<Short4> y)
2996 return x86::pmaxsw(x, y);
2999 RValue<Short4> Min(RValue<Short4> x, RValue<Short4> y)
3001 return x86::pminsw(x, y);
3004 RValue<Short4> AddSat(RValue<Short4> x, RValue<Short4> y)
3006 return x86::paddsw(x, y);
3009 RValue<Short4> SubSat(RValue<Short4> x, RValue<Short4> y)
3011 return x86::psubsw(x, y);
3014 RValue<Short4> MulHigh(RValue<Short4> x, RValue<Short4> y)
3016 return x86::pmulhw(x, y);
3019 RValue<Int2> MulAdd(RValue<Short4> x, RValue<Short4> y)
3021 return x86::pmaddwd(x, y);
3024 RValue<SByte8> Pack(RValue<Short4> x, RValue<Short4> y)
3026 return x86::packsswb(x, y);
3029 RValue<Int2> UnpackLow(RValue<Short4> x, RValue<Short4> y)
3031 if(CPUID::supportsMMX2())
3033 return x86::punpcklwd(x, y);
3037 int shuffle[4] = {0, 4, 1, 5};
3038 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
3040 return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType()));
3044 RValue<Int2> UnpackHigh(RValue<Short4> x, RValue<Short4> y)
3046 if(CPUID::supportsMMX2())
3048 return x86::punpckhwd(x, y);
3052 int shuffle[4] = {2, 6, 3, 7};
3053 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
3055 return RValue<Int2>(Nucleus::createBitCast(packed, Int2::getType()));
3059 RValue<Short4> Swizzle(RValue<Short4> x, unsigned char select)
3061 if(CPUID::supportsMMX2())
3063 return x86::pshufw(x, select);
3067 return RValue<Short4>(createSwizzle4(x.value, select));
3071 RValue<Short4> Insert(RValue<Short4> val, RValue<Short> element, int i)
3073 if(CPUID::supportsMMX2())
3075 return x86::pinsrw(val, Int(element), i);
3079 return RValue<Short4>(Nucleus::createInsertElement(val.value, element.value, i));
3083 RValue<Short> Extract(RValue<Short4> val, int i)
3085 if(CPUID::supportsMMX2())
3087 return Short(x86::pextrw(val, i));
3091 return RValue<Short>(Nucleus::createExtractElement(val.value, Short::getType(), i));
3095 RValue<Short4> CmpGT(RValue<Short4> x, RValue<Short4> y)
3097 return x86::pcmpgtw(x, y);
3100 RValue<Short4> CmpEQ(RValue<Short4> x, RValue<Short4> y)
3102 return x86::pcmpeqw(x, y);
3105 Type *Short4::getType()
3107 if(CPUID::supportsMMX2())
3109 return MMX::getType();
3113 return T(VectorType::get(Short::getType(), 4));
3117 UShort4::UShort4(RValue<Int4> cast)
3119 *this = Short4(cast);
3122 UShort4::UShort4(RValue<Float4> cast, bool saturate)
3128 if(CPUID::supportsSSE4_1())
3130 sat = Min(cast, Float4(0xFFFF)); // packusdw takes care of 0x0000 saturation
3134 sat = Max(Min(cast, Float4(0xFFFF)), Float4(0x0000));
3144 if(!saturate || !CPUID::supportsSSE4_1())
3146 *this = Short4(Int4(int4));
3150 *this = As<Short4>(Int2(As<Int4>(x86::packusdw(As<UInt4>(int4), As<UInt4>(int4)))));
3156 // xyzw.parent = this;
3159 UShort4::UShort4(unsigned short xyzw)
3161 // xyzw.parent = this;
3163 int64_t constantVector[4] = {xyzw, xyzw, xyzw, xyzw};
3164 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(UShort::getType(), 4))));
3166 storeValue(Nucleus::createBitCast(vector, getType()));
3169 UShort4::UShort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
3171 // xyzw.parent = this;
3173 int64_t constantVector[4] = {x, y, z, w};
3174 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(UShort::getType(), 4))));
3176 storeValue(Nucleus::createBitCast(vector, getType()));
3179 UShort4::UShort4(RValue<UShort4> rhs)
3181 // xyzw.parent = this;
3183 storeValue(rhs.value);
3186 UShort4::UShort4(const UShort4 &rhs)
3188 // xyzw.parent = this;
3190 Value *value = rhs.loadValue();
3194 UShort4::UShort4(const Reference<UShort4> &rhs)
3196 // xyzw.parent = this;
3198 Value *value = rhs.loadValue();
3202 UShort4::UShort4(RValue<Short4> rhs)
3204 // xyzw.parent = this;
3206 storeValue(rhs.value);
3209 UShort4::UShort4(const Short4 &rhs)
3211 // xyzw.parent = this;
3213 Value *value = rhs.loadValue();
3217 UShort4::UShort4(const Reference<Short4> &rhs)
3219 // xyzw.parent = this;
3221 Value *value = rhs.loadValue();
3225 RValue<UShort4> UShort4::operator=(RValue<UShort4> rhs) const
3227 storeValue(rhs.value);
3232 RValue<UShort4> UShort4::operator=(const UShort4 &rhs) const
3234 Value *value = rhs.loadValue();
3237 return RValue<UShort4>(value);
3240 RValue<UShort4> UShort4::operator=(const Reference<UShort4> &rhs) const
3242 Value *value = rhs.loadValue();
3245 return RValue<UShort4>(value);
3248 RValue<UShort4> UShort4::operator=(RValue<Short4> rhs) const
3250 storeValue(rhs.value);
3252 return RValue<UShort4>(rhs);
3255 RValue<UShort4> UShort4::operator=(const Short4 &rhs) const
3257 Value *value = rhs.loadValue();
3260 return RValue<UShort4>(value);
3263 RValue<UShort4> UShort4::operator=(const Reference<Short4> &rhs) const
3265 Value *value = rhs.loadValue();
3268 return RValue<UShort4>(value);
3271 RValue<UShort4> operator+(RValue<UShort4> lhs, RValue<UShort4> rhs)
3273 if(CPUID::supportsMMX2())
3275 return As<UShort4>(x86::paddw(As<Short4>(lhs), As<Short4>(rhs)));
3279 return RValue<UShort4>(Nucleus::createAdd(lhs.value, rhs.value));
3283 RValue<UShort4> operator-(RValue<UShort4> lhs, RValue<UShort4> rhs)
3285 if(CPUID::supportsMMX2())
3287 return As<UShort4>(x86::psubw(As<Short4>(lhs), As<Short4>(rhs)));
3291 return RValue<UShort4>(Nucleus::createSub(lhs.value, rhs.value));
3295 RValue<UShort4> operator*(RValue<UShort4> lhs, RValue<UShort4> rhs)
3297 if(CPUID::supportsMMX2())
3299 return As<UShort4>(x86::pmullw(As<Short4>(lhs), As<Short4>(rhs)));
3303 return RValue<UShort4>(Nucleus::createMul(lhs.value, rhs.value));
3307 RValue<UShort4> operator&(RValue<UShort4> lhs, RValue<UShort4> rhs)
3309 if(CPUID::supportsMMX2())
3311 return As<UShort4>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
3315 return RValue<UShort4>(Nucleus::createAnd(lhs.value, rhs.value));
3319 RValue<UShort4> operator|(RValue<UShort4> lhs, RValue<UShort4> rhs)
3321 if(CPUID::supportsMMX2())
3323 return As<UShort4>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
3327 return RValue<UShort4>(Nucleus::createOr(lhs.value, rhs.value));
3331 RValue<UShort4> operator^(RValue<UShort4> lhs, RValue<UShort4> rhs)
3333 if(CPUID::supportsMMX2())
3335 return As<UShort4>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
3339 return RValue<UShort4>(Nucleus::createXor(lhs.value, rhs.value));
3343 RValue<UShort4> operator<<(RValue<UShort4> lhs, unsigned char rhs)
3345 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
3347 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
3350 RValue<UShort4> operator>>(RValue<UShort4> lhs, unsigned char rhs)
3352 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
3354 return x86::psrlw(lhs, rhs);
3357 RValue<UShort4> operator<<(RValue<UShort4> lhs, RValue<Long1> rhs)
3359 // return RValue<Short4>(Nucleus::createShl(lhs.value, rhs.value));
3361 return As<UShort4>(x86::psllw(As<Short4>(lhs), rhs));
3364 RValue<UShort4> operator>>(RValue<UShort4> lhs, RValue<Long1> rhs)
3366 // return RValue<Short4>(Nucleus::createLShr(lhs.value, rhs.value));
3368 return x86::psrlw(lhs, rhs);
3371 RValue<UShort4> operator<<=(const UShort4 &lhs, unsigned char rhs)
3373 return lhs = lhs << rhs;
3376 RValue<UShort4> operator>>=(const UShort4 &lhs, unsigned char rhs)
3378 return lhs = lhs >> rhs;
3381 RValue<UShort4> operator<<=(const UShort4 &lhs, RValue<Long1> rhs)
3383 return lhs = lhs << rhs;
3386 RValue<UShort4> operator>>=(const UShort4 &lhs, RValue<Long1> rhs)
3388 return lhs = lhs >> rhs;
3391 RValue<UShort4> operator~(RValue<UShort4> val)
3393 if(CPUID::supportsMMX2())
3395 return As<UShort4>(As<Short4>(val) ^ Short4(0xFFFFu, 0xFFFFu, 0xFFFFu, 0xFFFFu));
3399 return RValue<UShort4>(Nucleus::createNot(val.value));
3403 RValue<UShort4> Max(RValue<UShort4> x, RValue<UShort4> y)
3405 return RValue<UShort4>(Max(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
3408 RValue<UShort4> Min(RValue<UShort4> x, RValue<UShort4> y)
3410 return RValue<UShort4>(Min(As<Short4>(x) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u), As<Short4>(y) - Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u)) + Short4(0x8000u, 0x8000u, 0x8000u, 0x8000u));
3413 RValue<UShort4> AddSat(RValue<UShort4> x, RValue<UShort4> y)
3415 return x86::paddusw(x, y);
3418 RValue<UShort4> SubSat(RValue<UShort4> x, RValue<UShort4> y)
3420 return x86::psubusw(x, y);
3423 RValue<UShort4> MulHigh(RValue<UShort4> x, RValue<UShort4> y)
3425 return x86::pmulhuw(x, y);
3428 RValue<UShort4> Average(RValue<UShort4> x, RValue<UShort4> y)
3430 return x86::pavgw(x, y);
3433 RValue<Byte8> Pack(RValue<UShort4> x, RValue<UShort4> y)
3435 return x86::packuswb(x, y);
3438 Type *UShort4::getType()
3440 if(CPUID::supportsMMX2())
3442 return MMX::getType();
3446 return T(VectorType::get(UShort::getType(), 4));
3450 Short8::Short8(short c0, short c1, short c2, short c3, short c4, short c5, short c6, short c7)
3452 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3453 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3456 Short8::Short8(RValue<Short8> rhs)
3458 storeValue(rhs.value);
3461 Short8::Short8(const Reference<Short8> &rhs)
3463 Value *value = rhs.loadValue();
3467 Short8::Short8(RValue<Short4> lo, RValue<Short4> hi)
3469 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
3470 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
3472 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
3473 long2 = Nucleus::createInsertElement(long2, loLong, 0);
3474 long2 = Nucleus::createInsertElement(long2, hiLong, 1);
3475 Value *short8 = Nucleus::createBitCast(long2, Short8::getType());
3480 RValue<Short8> operator+(RValue<Short8> lhs, RValue<Short8> rhs)
3482 return RValue<Short8>(Nucleus::createAdd(lhs.value, rhs.value));
3485 RValue<Short8> operator&(RValue<Short8> lhs, RValue<Short8> rhs)
3487 return RValue<Short8>(Nucleus::createAnd(lhs.value, rhs.value));
3490 RValue<Short8> operator<<(RValue<Short8> lhs, unsigned char rhs)
3492 return x86::psllw(lhs, rhs); // FIXME: Fallback required
3495 RValue<Short8> operator>>(RValue<Short8> lhs, unsigned char rhs)
3497 return x86::psraw(lhs, rhs); // FIXME: Fallback required
3500 RValue<Int4> MulAdd(RValue<Short8> x, RValue<Short8> y)
3502 return x86::pmaddwd(x, y); // FIXME: Fallback required
3505 RValue<Int4> Abs(RValue<Int4> x)
3507 if(CPUID::supportsSSSE3())
3509 return x86::pabsd(x);
3513 Int4 mask = (x >> 31);
3514 return (mask ^ x) - mask;
3518 RValue<Short8> MulHigh(RValue<Short8> x, RValue<Short8> y)
3520 return x86::pmulhw(x, y); // FIXME: Fallback required
3523 Type *Short8::getType()
3525 return T(VectorType::get(Short::getType(), 8));
3528 UShort8::UShort8(unsigned short c0, unsigned short c1, unsigned short c2, unsigned short c3, unsigned short c4, unsigned short c5, unsigned short c6, unsigned short c7)
3530 int64_t constantVector[8] = {c0, c1, c2, c3, c4, c5, c6, c7};
3531 storeValue(Nucleus::createConstantVector(constantVector, getType()));
3534 UShort8::UShort8(RValue<UShort8> rhs)
3536 storeValue(rhs.value);
3539 UShort8::UShort8(const Reference<UShort8> &rhs)
3541 Value *value = rhs.loadValue();
3545 UShort8::UShort8(RValue<UShort4> lo, RValue<UShort4> hi)
3547 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
3548 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
3550 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
3551 long2 = Nucleus::createInsertElement(long2, loLong, 0);
3552 long2 = Nucleus::createInsertElement(long2, hiLong, 1);
3553 Value *short8 = Nucleus::createBitCast(long2, Short8::getType());
3558 RValue<UShort8> UShort8::operator=(RValue<UShort8> rhs) const
3560 storeValue(rhs.value);
3565 RValue<UShort8> UShort8::operator=(const UShort8 &rhs) const
3567 Value *value = rhs.loadValue();
3570 return RValue<UShort8>(value);
3573 RValue<UShort8> UShort8::operator=(const Reference<UShort8> &rhs) const
3575 Value *value = rhs.loadValue();
3578 return RValue<UShort8>(value);
3581 RValue<UShort8> operator&(RValue<UShort8> lhs, RValue<UShort8> rhs)
3583 return RValue<UShort8>(Nucleus::createAnd(lhs.value, rhs.value));
3586 RValue<UShort8> operator<<(RValue<UShort8> lhs, unsigned char rhs)
3588 return As<UShort8>(x86::psllw(As<Short8>(lhs), rhs)); // FIXME: Fallback required
3591 RValue<UShort8> operator>>(RValue<UShort8> lhs, unsigned char rhs)
3593 return x86::psrlw(lhs, rhs); // FIXME: Fallback required
3596 RValue<UShort8> operator+(RValue<UShort8> lhs, RValue<UShort8> rhs)
3598 return RValue<UShort8>(Nucleus::createAdd(lhs.value, rhs.value));
3601 RValue<UShort8> operator*(RValue<UShort8> lhs, RValue<UShort8> rhs)
3603 return RValue<UShort8>(Nucleus::createMul(lhs.value, rhs.value));
3606 RValue<UShort8> operator+=(const UShort8 &lhs, RValue<UShort8> rhs)
3608 return lhs = lhs + rhs;
3611 RValue<UShort8> operator~(RValue<UShort8> val)
3613 return RValue<UShort8>(Nucleus::createNot(val.value));
3616 RValue<UShort8> Swizzle(RValue<UShort8> x, char select0, char select1, char select2, char select3, char select4, char select5, char select6, char select7)
3638 Value *byte16 = Nucleus::createBitCast(x.value, Byte16::getType());
3639 Value *shuffle = Nucleus::createShuffleVector(byte16, byte16, pshufb);
3640 Value *short8 = Nucleus::createBitCast(shuffle, UShort8::getType());
3642 return RValue<UShort8>(short8);
3645 RValue<UShort8> MulHigh(RValue<UShort8> x, RValue<UShort8> y)
3647 return x86::pmulhuw(x, y); // FIXME: Fallback required
3650 Type *UShort8::getType()
3652 return T(VectorType::get(UShort::getType(), 8));
3655 Int::Int(Argument<Int> argument)
3657 storeValue(argument.value);
3660 Int::Int(RValue<Byte> cast)
3662 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3664 storeValue(integer);
3667 Int::Int(RValue<SByte> cast)
3669 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3671 storeValue(integer);
3674 Int::Int(RValue<Short> cast)
3676 Value *integer = Nucleus::createSExt(cast.value, Int::getType());
3678 storeValue(integer);
3681 Int::Int(RValue<UShort> cast)
3683 Value *integer = Nucleus::createZExt(cast.value, Int::getType());
3685 storeValue(integer);
3688 Int::Int(RValue<Int2> cast)
3690 *this = Extract(cast, 0);
3693 Int::Int(RValue<Long> cast)
3695 Value *integer = Nucleus::createTrunc(cast.value, Int::getType());
3697 storeValue(integer);
3700 Int::Int(RValue<Float> cast)
3702 Value *integer = Nucleus::createFPToSI(cast.value, Int::getType());
3704 storeValue(integer);
3713 storeValue(Nucleus::createConstantInt(x));
3716 Int::Int(RValue<Int> rhs)
3718 storeValue(rhs.value);
3721 Int::Int(RValue<UInt> rhs)
3723 storeValue(rhs.value);
3726 Int::Int(const Int &rhs)
3728 Value *value = rhs.loadValue();
3732 Int::Int(const Reference<Int> &rhs)
3734 Value *value = rhs.loadValue();
3738 Int::Int(const UInt &rhs)
3740 Value *value = rhs.loadValue();
3744 Int::Int(const Reference<UInt> &rhs)
3746 Value *value = rhs.loadValue();
3750 RValue<Int> Int::operator=(int rhs) const
3752 return RValue<Int>(storeValue(Nucleus::createConstantInt(rhs)));
3755 RValue<Int> Int::operator=(RValue<Int> rhs) const
3757 storeValue(rhs.value);
3762 RValue<Int> Int::operator=(RValue<UInt> rhs) const
3764 storeValue(rhs.value);
3766 return RValue<Int>(rhs);
3769 RValue<Int> Int::operator=(const Int &rhs) const
3771 Value *value = rhs.loadValue();
3774 return RValue<Int>(value);
3777 RValue<Int> Int::operator=(const Reference<Int> &rhs) const
3779 Value *value = rhs.loadValue();
3782 return RValue<Int>(value);
3785 RValue<Int> Int::operator=(const UInt &rhs) const
3787 Value *value = rhs.loadValue();
3790 return RValue<Int>(value);
3793 RValue<Int> Int::operator=(const Reference<UInt> &rhs) const
3795 Value *value = rhs.loadValue();
3798 return RValue<Int>(value);
3801 RValue<Int> operator+(RValue<Int> lhs, RValue<Int> rhs)
3803 return RValue<Int>(Nucleus::createAdd(lhs.value, rhs.value));
3806 RValue<Int> operator-(RValue<Int> lhs, RValue<Int> rhs)
3808 return RValue<Int>(Nucleus::createSub(lhs.value, rhs.value));
3811 RValue<Int> operator*(RValue<Int> lhs, RValue<Int> rhs)
3813 return RValue<Int>(Nucleus::createMul(lhs.value, rhs.value));
3816 RValue<Int> operator/(RValue<Int> lhs, RValue<Int> rhs)
3818 return RValue<Int>(Nucleus::createSDiv(lhs.value, rhs.value));
3821 RValue<Int> operator%(RValue<Int> lhs, RValue<Int> rhs)
3823 return RValue<Int>(Nucleus::createSRem(lhs.value, rhs.value));
3826 RValue<Int> operator&(RValue<Int> lhs, RValue<Int> rhs)
3828 return RValue<Int>(Nucleus::createAnd(lhs.value, rhs.value));
3831 RValue<Int> operator|(RValue<Int> lhs, RValue<Int> rhs)
3833 return RValue<Int>(Nucleus::createOr(lhs.value, rhs.value));
3836 RValue<Int> operator^(RValue<Int> lhs, RValue<Int> rhs)
3838 return RValue<Int>(Nucleus::createXor(lhs.value, rhs.value));
3841 RValue<Int> operator<<(RValue<Int> lhs, RValue<Int> rhs)
3843 return RValue<Int>(Nucleus::createShl(lhs.value, rhs.value));
3846 RValue<Int> operator>>(RValue<Int> lhs, RValue<Int> rhs)
3848 return RValue<Int>(Nucleus::createAShr(lhs.value, rhs.value));
3851 RValue<Int> operator+=(const Int &lhs, RValue<Int> rhs)
3853 return lhs = lhs + rhs;
3856 RValue<Int> operator-=(const Int &lhs, RValue<Int> rhs)
3858 return lhs = lhs - rhs;
3861 RValue<Int> operator*=(const Int &lhs, RValue<Int> rhs)
3863 return lhs = lhs * rhs;
3866 RValue<Int> operator/=(const Int &lhs, RValue<Int> rhs)
3868 return lhs = lhs / rhs;
3871 RValue<Int> operator%=(const Int &lhs, RValue<Int> rhs)
3873 return lhs = lhs % rhs;
3876 RValue<Int> operator&=(const Int &lhs, RValue<Int> rhs)
3878 return lhs = lhs & rhs;
3881 RValue<Int> operator|=(const Int &lhs, RValue<Int> rhs)
3883 return lhs = lhs | rhs;
3886 RValue<Int> operator^=(const Int &lhs, RValue<Int> rhs)
3888 return lhs = lhs ^ rhs;
3891 RValue<Int> operator<<=(const Int &lhs, RValue<Int> rhs)
3893 return lhs = lhs << rhs;
3896 RValue<Int> operator>>=(const Int &lhs, RValue<Int> rhs)
3898 return lhs = lhs >> rhs;
3901 RValue<Int> operator+(RValue<Int> val)
3906 RValue<Int> operator-(RValue<Int> val)
3908 return RValue<Int>(Nucleus::createNeg(val.value));
3911 RValue<Int> operator~(RValue<Int> val)
3913 return RValue<Int>(Nucleus::createNot(val.value));
3916 RValue<Int> operator++(const Int &val, int) // Post-increment
3918 RValue<Int> res = val;
3920 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantInt(1)));
3921 val.storeValue(inc);
3926 const Int &operator++(const Int &val) // Pre-increment
3928 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantInt(1)));
3929 val.storeValue(inc);
3934 RValue<Int> operator--(const Int &val, int) // Post-decrement
3936 RValue<Int> res = val;
3938 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantInt(1)));
3939 val.storeValue(inc);
3944 const Int &operator--(const Int &val) // Pre-decrement
3946 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantInt(1)));
3947 val.storeValue(inc);
3952 RValue<Bool> operator<(RValue<Int> lhs, RValue<Int> rhs)
3954 return RValue<Bool>(Nucleus::createICmpSLT(lhs.value, rhs.value));
3957 RValue<Bool> operator<=(RValue<Int> lhs, RValue<Int> rhs)
3959 return RValue<Bool>(Nucleus::createICmpSLE(lhs.value, rhs.value));
3962 RValue<Bool> operator>(RValue<Int> lhs, RValue<Int> rhs)
3964 return RValue<Bool>(Nucleus::createICmpSGT(lhs.value, rhs.value));
3967 RValue<Bool> operator>=(RValue<Int> lhs, RValue<Int> rhs)
3969 return RValue<Bool>(Nucleus::createICmpSGE(lhs.value, rhs.value));
3972 RValue<Bool> operator!=(RValue<Int> lhs, RValue<Int> rhs)
3974 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
3977 RValue<Bool> operator==(RValue<Int> lhs, RValue<Int> rhs)
3979 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
3982 RValue<Int> Max(RValue<Int> x, RValue<Int> y)
3984 return IfThenElse(x > y, x, y);
3987 RValue<Int> Min(RValue<Int> x, RValue<Int> y)
3989 return IfThenElse(x < y, x, y);
3992 RValue<Int> Clamp(RValue<Int> x, RValue<Int> min, RValue<Int> max)
3994 return Min(Max(x, min), max);
3997 RValue<Int> RoundInt(RValue<Float> cast)
3999 return x86::cvtss2si(cast);
4001 // return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f));
4004 Type *Int::getType()
4006 return T(llvm::Type::getInt32Ty(*::context));
4009 Long::Long(RValue<Int> cast)
4011 Value *integer = Nucleus::createSExt(cast.value, Long::getType());
4013 storeValue(integer);
4016 Long::Long(RValue<UInt> cast)
4018 Value *integer = Nucleus::createZExt(cast.value, Long::getType());
4020 storeValue(integer);
4027 Long::Long(RValue<Long> rhs)
4029 storeValue(rhs.value);
4032 RValue<Long> Long::operator=(int64_t rhs) const
4034 return RValue<Long>(storeValue(Nucleus::createConstantLong(rhs)));
4037 RValue<Long> Long::operator=(RValue<Long> rhs) const
4039 storeValue(rhs.value);
4044 RValue<Long> Long::operator=(const Long &rhs) const
4046 Value *value = rhs.loadValue();
4049 return RValue<Long>(value);
4052 RValue<Long> Long::operator=(const Reference<Long> &rhs) const
4054 Value *value = rhs.loadValue();
4057 return RValue<Long>(value);
4060 RValue<Long> operator+(RValue<Long> lhs, RValue<Long> rhs)
4062 return RValue<Long>(Nucleus::createAdd(lhs.value, rhs.value));
4065 RValue<Long> operator-(RValue<Long> lhs, RValue<Long> rhs)
4067 return RValue<Long>(Nucleus::createSub(lhs.value, rhs.value));
4070 RValue<Long> operator+=(const Long &lhs, RValue<Long> rhs)
4072 return lhs = lhs + rhs;
4075 RValue<Long> operator-=(const Long &lhs, RValue<Long> rhs)
4077 return lhs = lhs - rhs;
4080 RValue<Long> AddAtomic(RValue<Pointer<Long> > x, RValue<Long> y)
4082 return RValue<Long>(Nucleus::createAtomicAdd(x.value, y.value));
4085 Type *Long::getType()
4087 return T(llvm::Type::getInt64Ty(*::context));
4090 Long1::Long1(const RValue<UInt> cast)
4092 Value *undefCast = Nucleus::createInsertElement(V(UndefValue::get(VectorType::get(Int::getType(), 2))), cast.value, 0);
4093 Value *zeroCast = Nucleus::createInsertElement(undefCast, V(Nucleus::createConstantInt(0)), 1);
4095 storeValue(Nucleus::createBitCast(zeroCast, Long1::getType()));
4098 Long1::Long1(RValue<Long1> rhs)
4100 storeValue(rhs.value);
4103 Type *Long1::getType()
4105 if(CPUID::supportsMMX2())
4107 return MMX::getType();
4111 return T(VectorType::get(Long::getType(), 1));
4115 UInt::UInt(Argument<UInt> argument)
4117 storeValue(argument.value);
4120 UInt::UInt(RValue<UShort> cast)
4122 Value *integer = Nucleus::createZExt(cast.value, UInt::getType());
4124 storeValue(integer);
4127 UInt::UInt(RValue<Long> cast)
4129 Value *integer = Nucleus::createTrunc(cast.value, UInt::getType());
4131 storeValue(integer);
4134 UInt::UInt(RValue<Float> cast)
4136 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
4137 // Value *integer = Nucleus::createFPToUI(cast.value, UInt::getType());
4139 // Smallest positive value representable in UInt, but not in Int
4140 const unsigned int ustart = 0x80000000u;
4141 const float ustartf = float(ustart);
4143 // If the value is negative, store 0, otherwise store the result of the conversion
4144 storeValue((~(As<Int>(cast) >> 31) &
4145 // Check if the value can be represented as an Int
4146 IfThenElse(cast >= ustartf,
4147 // If the value is too large, subtract ustart and re-add it after conversion.
4148 As<Int>(As<UInt>(Int(cast - Float(ustartf))) + UInt(ustart)),
4149 // Otherwise, just convert normally
4159 storeValue(Nucleus::createConstantInt(x));
4162 UInt::UInt(unsigned int x)
4164 storeValue(Nucleus::createConstantInt(x));
4167 UInt::UInt(RValue<UInt> rhs)
4169 storeValue(rhs.value);
4172 UInt::UInt(RValue<Int> rhs)
4174 storeValue(rhs.value);
4177 UInt::UInt(const UInt &rhs)
4179 Value *value = rhs.loadValue();
4183 UInt::UInt(const Reference<UInt> &rhs)
4185 Value *value = rhs.loadValue();
4189 UInt::UInt(const Int &rhs)
4191 Value *value = rhs.loadValue();
4195 UInt::UInt(const Reference<Int> &rhs)
4197 Value *value = rhs.loadValue();
4201 RValue<UInt> UInt::operator=(unsigned int rhs) const
4203 return RValue<UInt>(storeValue(Nucleus::createConstantInt(rhs)));
4206 RValue<UInt> UInt::operator=(RValue<UInt> rhs) const
4208 storeValue(rhs.value);
4213 RValue<UInt> UInt::operator=(RValue<Int> rhs) const
4215 storeValue(rhs.value);
4217 return RValue<UInt>(rhs);
4220 RValue<UInt> UInt::operator=(const UInt &rhs) const
4222 Value *value = rhs.loadValue();
4225 return RValue<UInt>(value);
4228 RValue<UInt> UInt::operator=(const Reference<UInt> &rhs) const
4230 Value *value = rhs.loadValue();
4233 return RValue<UInt>(value);
4236 RValue<UInt> UInt::operator=(const Int &rhs) const
4238 Value *value = rhs.loadValue();
4241 return RValue<UInt>(value);
4244 RValue<UInt> UInt::operator=(const Reference<Int> &rhs) const
4246 Value *value = rhs.loadValue();
4249 return RValue<UInt>(value);
4252 RValue<UInt> operator+(RValue<UInt> lhs, RValue<UInt> rhs)
4254 return RValue<UInt>(Nucleus::createAdd(lhs.value, rhs.value));
4257 RValue<UInt> operator-(RValue<UInt> lhs, RValue<UInt> rhs)
4259 return RValue<UInt>(Nucleus::createSub(lhs.value, rhs.value));
4262 RValue<UInt> operator*(RValue<UInt> lhs, RValue<UInt> rhs)
4264 return RValue<UInt>(Nucleus::createMul(lhs.value, rhs.value));
4267 RValue<UInt> operator/(RValue<UInt> lhs, RValue<UInt> rhs)
4269 return RValue<UInt>(Nucleus::createUDiv(lhs.value, rhs.value));
4272 RValue<UInt> operator%(RValue<UInt> lhs, RValue<UInt> rhs)
4274 return RValue<UInt>(Nucleus::createURem(lhs.value, rhs.value));
4277 RValue<UInt> operator&(RValue<UInt> lhs, RValue<UInt> rhs)
4279 return RValue<UInt>(Nucleus::createAnd(lhs.value, rhs.value));
4282 RValue<UInt> operator|(RValue<UInt> lhs, RValue<UInt> rhs)
4284 return RValue<UInt>(Nucleus::createOr(lhs.value, rhs.value));
4287 RValue<UInt> operator^(RValue<UInt> lhs, RValue<UInt> rhs)
4289 return RValue<UInt>(Nucleus::createXor(lhs.value, rhs.value));
4292 RValue<UInt> operator<<(RValue<UInt> lhs, RValue<UInt> rhs)
4294 return RValue<UInt>(Nucleus::createShl(lhs.value, rhs.value));
4297 RValue<UInt> operator>>(RValue<UInt> lhs, RValue<UInt> rhs)
4299 return RValue<UInt>(Nucleus::createLShr(lhs.value, rhs.value));
4302 RValue<UInt> operator+=(const UInt &lhs, RValue<UInt> rhs)
4304 return lhs = lhs + rhs;
4307 RValue<UInt> operator-=(const UInt &lhs, RValue<UInt> rhs)
4309 return lhs = lhs - rhs;
4312 RValue<UInt> operator*=(const UInt &lhs, RValue<UInt> rhs)
4314 return lhs = lhs * rhs;
4317 RValue<UInt> operator/=(const UInt &lhs, RValue<UInt> rhs)
4319 return lhs = lhs / rhs;
4322 RValue<UInt> operator%=(const UInt &lhs, RValue<UInt> rhs)
4324 return lhs = lhs % rhs;
4327 RValue<UInt> operator&=(const UInt &lhs, RValue<UInt> rhs)
4329 return lhs = lhs & rhs;
4332 RValue<UInt> operator|=(const UInt &lhs, RValue<UInt> rhs)
4334 return lhs = lhs | rhs;
4337 RValue<UInt> operator^=(const UInt &lhs, RValue<UInt> rhs)
4339 return lhs = lhs ^ rhs;
4342 RValue<UInt> operator<<=(const UInt &lhs, RValue<UInt> rhs)
4344 return lhs = lhs << rhs;
4347 RValue<UInt> operator>>=(const UInt &lhs, RValue<UInt> rhs)
4349 return lhs = lhs >> rhs;
4352 RValue<UInt> operator+(RValue<UInt> val)
4357 RValue<UInt> operator-(RValue<UInt> val)
4359 return RValue<UInt>(Nucleus::createNeg(val.value));
4362 RValue<UInt> operator~(RValue<UInt> val)
4364 return RValue<UInt>(Nucleus::createNot(val.value));
4367 RValue<UInt> operator++(const UInt &val, int) // Post-increment
4369 RValue<UInt> res = val;
4371 Value *inc = Nucleus::createAdd(res.value, V(Nucleus::createConstantInt(1)));
4372 val.storeValue(inc);
4377 const UInt &operator++(const UInt &val) // Pre-increment
4379 Value *inc = Nucleus::createAdd(val.loadValue(), V(Nucleus::createConstantInt(1)));
4380 val.storeValue(inc);
4385 RValue<UInt> operator--(const UInt &val, int) // Post-decrement
4387 RValue<UInt> res = val;
4389 Value *inc = Nucleus::createSub(res.value, V(Nucleus::createConstantInt(1)));
4390 val.storeValue(inc);
4395 const UInt &operator--(const UInt &val) // Pre-decrement
4397 Value *inc = Nucleus::createSub(val.loadValue(), V(Nucleus::createConstantInt(1)));
4398 val.storeValue(inc);
4403 RValue<UInt> Max(RValue<UInt> x, RValue<UInt> y)
4405 return IfThenElse(x > y, x, y);
4408 RValue<UInt> Min(RValue<UInt> x, RValue<UInt> y)
4410 return IfThenElse(x < y, x, y);
4413 RValue<UInt> Clamp(RValue<UInt> x, RValue<UInt> min, RValue<UInt> max)
4415 return Min(Max(x, min), max);
4418 RValue<Bool> operator<(RValue<UInt> lhs, RValue<UInt> rhs)
4420 return RValue<Bool>(Nucleus::createICmpULT(lhs.value, rhs.value));
4423 RValue<Bool> operator<=(RValue<UInt> lhs, RValue<UInt> rhs)
4425 return RValue<Bool>(Nucleus::createICmpULE(lhs.value, rhs.value));
4428 RValue<Bool> operator>(RValue<UInt> lhs, RValue<UInt> rhs)
4430 return RValue<Bool>(Nucleus::createICmpUGT(lhs.value, rhs.value));
4433 RValue<Bool> operator>=(RValue<UInt> lhs, RValue<UInt> rhs)
4435 return RValue<Bool>(Nucleus::createICmpUGE(lhs.value, rhs.value));
4438 RValue<Bool> operator!=(RValue<UInt> lhs, RValue<UInt> rhs)
4440 return RValue<Bool>(Nucleus::createICmpNE(lhs.value, rhs.value));
4443 RValue<Bool> operator==(RValue<UInt> lhs, RValue<UInt> rhs)
4445 return RValue<Bool>(Nucleus::createICmpEQ(lhs.value, rhs.value));
4448 // RValue<UInt> RoundUInt(RValue<Float> cast)
4450 // return x86::cvtss2si(val); // FIXME: Unsigned
4452 // // return IfThenElse(val > 0.0f, Int(val + 0.5f), Int(val - 0.5f));
4455 Type *UInt::getType()
4457 return T(llvm::Type::getInt32Ty(*::context));
4460 // Int2::Int2(RValue<Int> cast)
4462 // Value *extend = Nucleus::createZExt(cast.value, Long::getType());
4463 // Value *vector = Nucleus::createBitCast(extend, Int2::getType());
4465 // int shuffle[2] = {0, 0};
4466 // Value *replicate = Nucleus::createShuffleVector(vector, vector, shuffle);
4468 // storeValue(replicate);
4471 Int2::Int2(RValue<Int4> cast)
4473 Value *long2 = Nucleus::createBitCast(cast.value, T(VectorType::get(Long::getType(), 2)));
4474 Value *element = Nucleus::createExtractElement(long2, Long::getType(), 0);
4475 Value *int2 = Nucleus::createBitCast(element, Int2::getType());
4482 // xy.parent = this;
4485 Int2::Int2(int x, int y)
4487 // xy.parent = this;
4489 int64_t constantVector[2] = {x, y};
4490 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(Int::getType(), 2))));
4492 storeValue(Nucleus::createBitCast(vector, getType()));
4495 Int2::Int2(RValue<Int2> rhs)
4497 // xy.parent = this;
4499 storeValue(rhs.value);
4502 Int2::Int2(const Int2 &rhs)
4504 // xy.parent = this;
4506 Value *value = rhs.loadValue();
4510 Int2::Int2(const Reference<Int2> &rhs)
4512 // xy.parent = this;
4514 Value *value = rhs.loadValue();
4518 Int2::Int2(RValue<Int> lo, RValue<Int> hi)
4520 if(CPUID::supportsMMX2())
4524 // punpckldq mm0, mm1
4525 storeValue(As<Int2>(UnpackLow(As<Int2>(Long1(RValue<UInt>(lo))), As<Int2>(Long1(RValue<UInt>(hi))))).value);
4529 int shuffle[2] = {0, 1};
4530 Value *packed = Nucleus::createShuffleVector(Nucleus::createBitCast(lo.value, T(VectorType::get(Int::getType(), 1))), Nucleus::createBitCast(hi.value, T(VectorType::get(Int::getType(), 1))), shuffle);
4532 storeValue(Nucleus::createBitCast(packed, Int2::getType()));
4536 RValue<Int2> Int2::operator=(RValue<Int2> rhs) const
4538 storeValue(rhs.value);
4543 RValue<Int2> Int2::operator=(const Int2 &rhs) const
4545 Value *value = rhs.loadValue();
4548 return RValue<Int2>(value);
4551 RValue<Int2> Int2::operator=(const Reference<Int2> &rhs) const
4553 Value *value = rhs.loadValue();
4556 return RValue<Int2>(value);
4559 RValue<Int2> operator+(RValue<Int2> lhs, RValue<Int2> rhs)
4561 if(CPUID::supportsMMX2())
4563 return x86::paddd(lhs, rhs);
4567 return RValue<Int2>(Nucleus::createAdd(lhs.value, rhs.value));
4571 RValue<Int2> operator-(RValue<Int2> lhs, RValue<Int2> rhs)
4573 if(CPUID::supportsMMX2())
4575 return x86::psubd(lhs, rhs);
4579 return RValue<Int2>(Nucleus::createSub(lhs.value, rhs.value));
4583 // RValue<Int2> operator*(RValue<Int2> lhs, RValue<Int2> rhs)
4585 // return RValue<Int2>(Nucleus::createMul(lhs.value, rhs.value));
4588 // RValue<Int2> operator/(RValue<Int2> lhs, RValue<Int2> rhs)
4590 // return RValue<Int2>(Nucleus::createSDiv(lhs.value, rhs.value));
4593 // RValue<Int2> operator%(RValue<Int2> lhs, RValue<Int2> rhs)
4595 // return RValue<Int2>(Nucleus::createSRem(lhs.value, rhs.value));
4598 RValue<Int2> operator&(RValue<Int2> lhs, RValue<Int2> rhs)
4600 if(CPUID::supportsMMX2())
4602 return As<Int2>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
4606 return RValue<Int2>(Nucleus::createAnd(lhs.value, rhs.value));
4610 RValue<Int2> operator|(RValue<Int2> lhs, RValue<Int2> rhs)
4612 if(CPUID::supportsMMX2())
4614 return As<Int2>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
4618 return RValue<Int2>(Nucleus::createOr(lhs.value, rhs.value));
4622 RValue<Int2> operator^(RValue<Int2> lhs, RValue<Int2> rhs)
4624 if(CPUID::supportsMMX2())
4626 return As<Int2>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
4630 return RValue<Int2>(Nucleus::createXor(lhs.value, rhs.value));
4634 RValue<Int2> operator<<(RValue<Int2> lhs, unsigned char rhs)
4636 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
4638 return x86::pslld(lhs, rhs);
4641 RValue<Int2> operator>>(RValue<Int2> lhs, unsigned char rhs)
4643 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
4645 return x86::psrad(lhs, rhs);
4648 RValue<Int2> operator<<(RValue<Int2> lhs, RValue<Long1> rhs)
4650 // return RValue<Int2>(Nucleus::createShl(lhs.value, rhs.value));
4652 return x86::pslld(lhs, rhs);
4655 RValue<Int2> operator>>(RValue<Int2> lhs, RValue<Long1> rhs)
4657 // return RValue<Int2>(Nucleus::createAShr(lhs.value, rhs.value));
4659 return x86::psrad(lhs, rhs);
4662 RValue<Int2> operator+=(const Int2 &lhs, RValue<Int2> rhs)
4664 return lhs = lhs + rhs;
4667 RValue<Int2> operator-=(const Int2 &lhs, RValue<Int2> rhs)
4669 return lhs = lhs - rhs;
4672 // RValue<Int2> operator*=(const Int2 &lhs, RValue<Int2> rhs)
4674 // return lhs = lhs * rhs;
4677 // RValue<Int2> operator/=(const Int2 &lhs, RValue<Int2> rhs)
4679 // return lhs = lhs / rhs;
4682 // RValue<Int2> operator%=(const Int2 &lhs, RValue<Int2> rhs)
4684 // return lhs = lhs % rhs;
4687 RValue<Int2> operator&=(const Int2 &lhs, RValue<Int2> rhs)
4689 return lhs = lhs & rhs;
4692 RValue<Int2> operator|=(const Int2 &lhs, RValue<Int2> rhs)
4694 return lhs = lhs | rhs;
4697 RValue<Int2> operator^=(const Int2 &lhs, RValue<Int2> rhs)
4699 return lhs = lhs ^ rhs;
4702 RValue<Int2> operator<<=(const Int2 &lhs, unsigned char rhs)
4704 return lhs = lhs << rhs;
4707 RValue<Int2> operator>>=(const Int2 &lhs, unsigned char rhs)
4709 return lhs = lhs >> rhs;
4712 RValue<Int2> operator<<=(const Int2 &lhs, RValue<Long1> rhs)
4714 return lhs = lhs << rhs;
4717 RValue<Int2> operator>>=(const Int2 &lhs, RValue<Long1> rhs)
4719 return lhs = lhs >> rhs;
4722 // RValue<Int2> operator+(RValue<Int2> val)
4727 // RValue<Int2> operator-(RValue<Int2> val)
4729 // return RValue<Int2>(Nucleus::createNeg(val.value));
4732 RValue<Int2> operator~(RValue<Int2> val)
4734 if(CPUID::supportsMMX2())
4736 return val ^ Int2(0xFFFFFFFF, 0xFFFFFFFF);
4740 return RValue<Int2>(Nucleus::createNot(val.value));
4744 RValue<Long1> UnpackLow(RValue<Int2> x, RValue<Int2> y)
4746 if(CPUID::supportsMMX2())
4748 return x86::punpckldq(x, y);
4752 int shuffle[2] = {0, 2};
4753 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
4755 return RValue<Long1>(Nucleus::createBitCast(packed, Long1::getType()));
4759 RValue<Long1> UnpackHigh(RValue<Int2> x, RValue<Int2> y)
4761 if(CPUID::supportsMMX2())
4763 return x86::punpckhdq(x, y);
4767 int shuffle[2] = {1, 3};
4768 Value *packed = Nucleus::createShuffleVector(x.value, y.value, shuffle);
4770 return RValue<Long1>(Nucleus::createBitCast(packed, Long1::getType()));
4774 RValue<Int> Extract(RValue<Int2> val, int i)
4776 if(false) // FIXME: LLVM does not generate optimal code
4778 return RValue<Int>(Nucleus::createExtractElement(val.value, Int::getType(), i));
4784 return RValue<Int>(Nucleus::createExtractElement(Nucleus::createBitCast(val.value, T(VectorType::get(Int::getType(), 2))), Int::getType(), 0));
4788 Int2 val2 = As<Int2>(UnpackHigh(val, val));
4790 return Extract(val2, 0);
4795 RValue<Int2> Insert(RValue<Int2> val, RValue<Int> element, int i)
4797 return RValue<Int2>(Nucleus::createBitCast(Nucleus::createInsertElement(Nucleus::createBitCast(val.value, T(VectorType::get(Int::getType(), 2))), element.value, i), Int2::getType()));
4800 Type *Int2::getType()
4802 if(CPUID::supportsMMX2())
4804 return MMX::getType();
4808 return T(VectorType::get(Int::getType(), 2));
4814 // xy.parent = this;
4817 UInt2::UInt2(unsigned int x, unsigned int y)
4819 // xy.parent = this;
4821 int64_t constantVector[2] = {x, y};
4822 Value *vector = V(Nucleus::createConstantVector(constantVector, T(VectorType::get(UInt::getType(), 2))));
4824 storeValue(Nucleus::createBitCast(vector, getType()));
4827 UInt2::UInt2(RValue<UInt2> rhs)
4829 // xy.parent = this;
4831 storeValue(rhs.value);
4834 UInt2::UInt2(const UInt2 &rhs)
4836 // xy.parent = this;
4838 Value *value = rhs.loadValue();
4842 UInt2::UInt2(const Reference<UInt2> &rhs)
4844 // xy.parent = this;
4846 Value *value = rhs.loadValue();
4850 RValue<UInt2> UInt2::operator=(RValue<UInt2> rhs) const
4852 storeValue(rhs.value);
4857 RValue<UInt2> UInt2::operator=(const UInt2 &rhs) const
4859 Value *value = rhs.loadValue();
4862 return RValue<UInt2>(value);
4865 RValue<UInt2> UInt2::operator=(const Reference<UInt2> &rhs) const
4867 Value *value = rhs.loadValue();
4870 return RValue<UInt2>(value);
4873 RValue<UInt2> operator+(RValue<UInt2> lhs, RValue<UInt2> rhs)
4875 if(CPUID::supportsMMX2())
4877 return As<UInt2>(x86::paddd(As<Int2>(lhs), As<Int2>(rhs)));
4881 return RValue<UInt2>(Nucleus::createAdd(lhs.value, rhs.value));
4885 RValue<UInt2> operator-(RValue<UInt2> lhs, RValue<UInt2> rhs)
4887 if(CPUID::supportsMMX2())
4889 return As<UInt2>(x86::psubd(As<Int2>(lhs), As<Int2>(rhs)));
4893 return RValue<UInt2>(Nucleus::createSub(lhs.value, rhs.value));
4897 // RValue<UInt2> operator*(RValue<UInt2> lhs, RValue<UInt2> rhs)
4899 // return RValue<UInt2>(Nucleus::createMul(lhs.value, rhs.value));
4902 // RValue<UInt2> operator/(RValue<UInt2> lhs, RValue<UInt2> rhs)
4904 // return RValue<UInt2>(Nucleus::createUDiv(lhs.value, rhs.value));
4907 // RValue<UInt2> operator%(RValue<UInt2> lhs, RValue<UInt2> rhs)
4909 // return RValue<UInt2>(Nucleus::createURem(lhs.value, rhs.value));
4912 RValue<UInt2> operator&(RValue<UInt2> lhs, RValue<UInt2> rhs)
4914 if(CPUID::supportsMMX2())
4916 return As<UInt2>(x86::pand(As<Short4>(lhs), As<Short4>(rhs)));
4920 return RValue<UInt2>(Nucleus::createAnd(lhs.value, rhs.value));
4924 RValue<UInt2> operator|(RValue<UInt2> lhs, RValue<UInt2> rhs)
4926 if(CPUID::supportsMMX2())
4928 return As<UInt2>(x86::por(As<Short4>(lhs), As<Short4>(rhs)));
4932 return RValue<UInt2>(Nucleus::createOr(lhs.value, rhs.value));
4936 RValue<UInt2> operator^(RValue<UInt2> lhs, RValue<UInt2> rhs)
4938 if(CPUID::supportsMMX2())
4940 return As<UInt2>(x86::pxor(As<Short4>(lhs), As<Short4>(rhs)));
4944 return RValue<UInt2>(Nucleus::createXor(lhs.value, rhs.value));
4948 RValue<UInt2> operator<<(RValue<UInt2> lhs, unsigned char rhs)
4950 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
4952 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
4955 RValue<UInt2> operator>>(RValue<UInt2> lhs, unsigned char rhs)
4957 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
4959 return x86::psrld(lhs, rhs);
4962 RValue<UInt2> operator<<(RValue<UInt2> lhs, RValue<Long1> rhs)
4964 // return RValue<UInt2>(Nucleus::createShl(lhs.value, rhs.value));
4966 return As<UInt2>(x86::pslld(As<Int2>(lhs), rhs));
4969 RValue<UInt2> operator>>(RValue<UInt2> lhs, RValue<Long1> rhs)
4971 // return RValue<UInt2>(Nucleus::createLShr(lhs.value, rhs.value));
4973 return x86::psrld(lhs, rhs);
4976 RValue<UInt2> operator+=(const UInt2 &lhs, RValue<UInt2> rhs)
4978 return lhs = lhs + rhs;
4981 RValue<UInt2> operator-=(const UInt2 &lhs, RValue<UInt2> rhs)
4983 return lhs = lhs - rhs;
4986 // RValue<UInt2> operator*=(const UInt2 &lhs, RValue<UInt2> rhs)
4988 // return lhs = lhs * rhs;
4991 // RValue<UInt2> operator/=(const UInt2 &lhs, RValue<UInt2> rhs)
4993 // return lhs = lhs / rhs;
4996 // RValue<UInt2> operator%=(const UInt2 &lhs, RValue<UInt2> rhs)
4998 // return lhs = lhs % rhs;
5001 RValue<UInt2> operator&=(const UInt2 &lhs, RValue<UInt2> rhs)
5003 return lhs = lhs & rhs;
5006 RValue<UInt2> operator|=(const UInt2 &lhs, RValue<UInt2> rhs)
5008 return lhs = lhs | rhs;
5011 RValue<UInt2> operator^=(const UInt2 &lhs, RValue<UInt2> rhs)
5013 return lhs = lhs ^ rhs;
5016 RValue<UInt2> operator<<=(const UInt2 &lhs, unsigned char rhs)
5018 return lhs = lhs << rhs;
5021 RValue<UInt2> operator>>=(const UInt2 &lhs, unsigned char rhs)
5023 return lhs = lhs >> rhs;
5026 RValue<UInt2> operator<<=(const UInt2 &lhs, RValue<Long1> rhs)
5028 return lhs = lhs << rhs;
5031 RValue<UInt2> operator>>=(const UInt2 &lhs, RValue<Long1> rhs)
5033 return lhs = lhs >> rhs;
5036 // RValue<UInt2> operator+(RValue<UInt2> val)
5041 // RValue<UInt2> operator-(RValue<UInt2> val)
5043 // return RValue<UInt2>(Nucleus::createNeg(val.value));
5046 RValue<UInt2> operator~(RValue<UInt2> val)
5048 if(CPUID::supportsMMX2())
5050 return val ^ UInt2(0xFFFFFFFF, 0xFFFFFFFF);
5054 return RValue<UInt2>(Nucleus::createNot(val.value));
5058 Type *UInt2::getType()
5060 if(CPUID::supportsMMX2())
5062 return MMX::getType();
5066 return T(VectorType::get(UInt::getType(), 2));
5070 Int4::Int4(RValue<Byte4> cast)
5072 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
5073 Value *a = Nucleus::createInsertElement(V(UndefValue::get(Int4::getType())), x, 0);
5077 if (CPUID::supportsSSE4_1())
5079 e = x86::pmovzxbd(RValue<Int4>(a)).value;
5083 int swizzle[16] = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
5084 Value *b = Nucleus::createBitCast(a, Byte16::getType());
5085 Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Byte16::getType())), swizzle);
5087 int swizzle2[8] = {0, 8, 1, 9, 2, 10, 3, 11};
5088 Value *d = Nucleus::createBitCast(c, Short8::getType());
5089 e = Nucleus::createShuffleVector(d, V(Nucleus::createNullValue(Short8::getType())), swizzle2);
5092 Value *f = Nucleus::createBitCast(e, Int4::getType());
5096 Int4::Int4(RValue<SByte4> cast)
5098 Value *x = Nucleus::createBitCast(cast.value, Int::getType());
5099 Value *a = Nucleus::createInsertElement(V(UndefValue::get(Int4::getType())), x, 0);
5103 if (CPUID::supportsSSE4_1())
5105 g = x86::pmovsxbd(RValue<Int4>(a)).value;
5109 int swizzle[16] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
5110 Value *b = Nucleus::createBitCast(a, Byte16::getType());
5111 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
5113 int swizzle2[8] = {0, 0, 1, 1, 2, 2, 3, 3};
5114 Value *d = Nucleus::createBitCast(c, Short8::getType());
5115 Value *e = Nucleus::createShuffleVector(d, d, swizzle2);
5117 Value *f = Nucleus::createBitCast(e, Int4::getType());
5118 // g = Nucleus::createAShr(f, Nucleus::createConstantInt(24));
5119 g = x86::psrad(RValue<Int4>(f), 24).value;
5125 Int4::Int4(RValue<Float4> cast)
5127 // xyzw.parent = this;
5129 Value *xyzw = Nucleus::createFPToSI(cast.value, Int4::getType());
5134 Int4::Int4(RValue<Short4> cast)
5136 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
5137 Value *element = Nucleus::createBitCast(cast.value, Long::getType());
5138 long2 = Nucleus::createInsertElement(long2, element, 0);
5139 RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType()));
5141 if(CPUID::supportsSSE4_1())
5143 storeValue(x86::pmovsxwd(vector).value);
5147 Value *b = Nucleus::createBitCast(vector.value, Short8::getType());
5149 int swizzle[8] = {0, 0, 1, 1, 2, 2, 3, 3};
5150 Value *c = Nucleus::createShuffleVector(b, b, swizzle);
5151 Value *d = Nucleus::createBitCast(c, Int4::getType());
5154 // Each Short is packed into each Int in the (Short | Short) format.
5155 // Shifting by 16 will retrieve the original Short value.
5156 // Shitfing an Int will propagate the sign bit, which will work
5157 // for both positive and negative values of a Short.
5162 Int4::Int4(RValue<UShort4> cast)
5164 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
5165 Value *element = Nucleus::createBitCast(cast.value, Long::getType());
5166 long2 = Nucleus::createInsertElement(long2, element, 0);
5167 RValue<Int4> vector = RValue<Int4>(Nucleus::createBitCast(long2, Int4::getType()));
5169 if(CPUID::supportsSSE4_1())
5171 storeValue(x86::pmovzxwd(RValue<Int4>(vector)).value);
5175 Value *b = Nucleus::createBitCast(vector.value, Short8::getType());
5177 int swizzle[8] = {0, 8, 1, 9, 2, 10, 3, 11};
5178 Value *c = Nucleus::createShuffleVector(b, V(Nucleus::createNullValue(Short8::getType())), swizzle);
5179 Value *d = Nucleus::createBitCast(c, Int4::getType());
5186 // xyzw.parent = this;
5189 Int4::Int4(int xyzw)
5191 constant(xyzw, xyzw, xyzw, xyzw);
5194 Int4::Int4(int x, int yzw)
5196 constant(x, yzw, yzw, yzw);
5199 Int4::Int4(int x, int y, int zw)
5201 constant(x, y, zw, zw);
5204 Int4::Int4(int x, int y, int z, int w)
5206 constant(x, y, z, w);
5209 void Int4::constant(int x, int y, int z, int w)
5211 // xyzw.parent = this;
5213 int64_t constantVector[4] = {x, y, z, w};
5214 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5217 Int4::Int4(RValue<Int4> rhs)
5219 // xyzw.parent = this;
5221 storeValue(rhs.value);
5224 Int4::Int4(const Int4 &rhs)
5226 // xyzw.parent = this;
5228 Value *value = rhs.loadValue();
5232 Int4::Int4(const Reference<Int4> &rhs)
5234 // xyzw.parent = this;
5236 Value *value = rhs.loadValue();
5240 Int4::Int4(RValue<UInt4> rhs)
5242 // xyzw.parent = this;
5244 storeValue(rhs.value);
5247 Int4::Int4(const UInt4 &rhs)
5249 // xyzw.parent = this;
5251 Value *value = rhs.loadValue();
5255 Int4::Int4(const Reference<UInt4> &rhs)
5257 // xyzw.parent = this;
5259 Value *value = rhs.loadValue();
5263 Int4::Int4(RValue<Int2> lo, RValue<Int2> hi)
5265 // xyzw.parent = this;
5267 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
5268 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
5270 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
5271 long2 = Nucleus::createInsertElement(long2, loLong, 0);
5272 long2 = Nucleus::createInsertElement(long2, hiLong, 1);
5273 Value *int4 = Nucleus::createBitCast(long2, Int4::getType());
5278 Int4::Int4(RValue<Int> rhs)
5280 // xyzw.parent = this;
5282 Value *vector = loadValue();
5283 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
5285 int swizzle[4] = {0, 0, 0, 0};
5286 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
5288 storeValue(replicate);
5291 Int4::Int4(const Int &rhs)
5293 // xyzw.parent = this;
5295 *this = RValue<Int>(rhs.loadValue());
5298 Int4::Int4(const Reference<Int> &rhs)
5300 // xyzw.parent = this;
5302 *this = RValue<Int>(rhs.loadValue());
5305 RValue<Int4> Int4::operator=(RValue<Int4> rhs) const
5307 storeValue(rhs.value);
5312 RValue<Int4> Int4::operator=(const Int4 &rhs) const
5314 Value *value = rhs.loadValue();
5317 return RValue<Int4>(value);
5320 RValue<Int4> Int4::operator=(const Reference<Int4> &rhs) const
5322 Value *value = rhs.loadValue();
5325 return RValue<Int4>(value);
5328 RValue<Int4> operator+(RValue<Int4> lhs, RValue<Int4> rhs)
5330 return RValue<Int4>(Nucleus::createAdd(lhs.value, rhs.value));
5333 RValue<Int4> operator-(RValue<Int4> lhs, RValue<Int4> rhs)
5335 return RValue<Int4>(Nucleus::createSub(lhs.value, rhs.value));
5338 RValue<Int4> operator*(RValue<Int4> lhs, RValue<Int4> rhs)
5340 return RValue<Int4>(Nucleus::createMul(lhs.value, rhs.value));
5343 RValue<Int4> operator/(RValue<Int4> lhs, RValue<Int4> rhs)
5345 return RValue<Int4>(Nucleus::createSDiv(lhs.value, rhs.value));
5348 RValue<Int4> operator%(RValue<Int4> lhs, RValue<Int4> rhs)
5350 return RValue<Int4>(Nucleus::createSRem(lhs.value, rhs.value));
5353 RValue<Int4> operator&(RValue<Int4> lhs, RValue<Int4> rhs)
5355 return RValue<Int4>(Nucleus::createAnd(lhs.value, rhs.value));
5358 RValue<Int4> operator|(RValue<Int4> lhs, RValue<Int4> rhs)
5360 return RValue<Int4>(Nucleus::createOr(lhs.value, rhs.value));
5363 RValue<Int4> operator^(RValue<Int4> lhs, RValue<Int4> rhs)
5365 return RValue<Int4>(Nucleus::createXor(lhs.value, rhs.value));
5368 RValue<Int4> operator<<(RValue<Int4> lhs, unsigned char rhs)
5370 return x86::pslld(lhs, rhs);
5373 RValue<Int4> operator>>(RValue<Int4> lhs, unsigned char rhs)
5375 return x86::psrad(lhs, rhs);
5378 RValue<Int4> operator<<(RValue<Int4> lhs, RValue<Int4> rhs)
5380 return RValue<Int4>(Nucleus::createShl(lhs.value, rhs.value));
5383 RValue<Int4> operator>>(RValue<Int4> lhs, RValue<Int4> rhs)
5385 return RValue<Int4>(Nucleus::createAShr(lhs.value, rhs.value));
5388 RValue<Int4> operator+=(const Int4 &lhs, RValue<Int4> rhs)
5390 return lhs = lhs + rhs;
5393 RValue<Int4> operator-=(const Int4 &lhs, RValue<Int4> rhs)
5395 return lhs = lhs - rhs;
5398 RValue<Int4> operator*=(const Int4 &lhs, RValue<Int4> rhs)
5400 return lhs = lhs * rhs;
5403 // RValue<Int4> operator/=(const Int4 &lhs, RValue<Int4> rhs)
5405 // return lhs = lhs / rhs;
5408 // RValue<Int4> operator%=(const Int4 &lhs, RValue<Int4> rhs)
5410 // return lhs = lhs % rhs;
5413 RValue<Int4> operator&=(const Int4 &lhs, RValue<Int4> rhs)
5415 return lhs = lhs & rhs;
5418 RValue<Int4> operator|=(const Int4 &lhs, RValue<Int4> rhs)
5420 return lhs = lhs | rhs;
5423 RValue<Int4> operator^=(const Int4 &lhs, RValue<Int4> rhs)
5425 return lhs = lhs ^ rhs;
5428 RValue<Int4> operator<<=(const Int4 &lhs, unsigned char rhs)
5430 return lhs = lhs << rhs;
5433 RValue<Int4> operator>>=(const Int4 &lhs, unsigned char rhs)
5435 return lhs = lhs >> rhs;
5438 RValue<Int4> operator+(RValue<Int4> val)
5443 RValue<Int4> operator-(RValue<Int4> val)
5445 return RValue<Int4>(Nucleus::createNeg(val.value));
5448 RValue<Int4> operator~(RValue<Int4> val)
5450 return RValue<Int4>(Nucleus::createNot(val.value));
5453 RValue<Int4> CmpEQ(RValue<Int4> x, RValue<Int4> y)
5455 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5456 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5457 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
5458 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5461 RValue<Int4> CmpLT(RValue<Int4> x, RValue<Int4> y)
5463 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType()));
5466 RValue<Int4> CmpLE(RValue<Int4> x, RValue<Int4> y)
5468 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5469 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5470 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLE(x.value, y.value), Int4::getType()));
5471 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5474 RValue<Int4> CmpNEQ(RValue<Int4> x, RValue<Int4> y)
5476 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
5479 RValue<Int4> CmpNLT(RValue<Int4> x, RValue<Int4> y)
5481 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5482 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5483 // return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGE(x.value, y.value), Int4::getType()));
5484 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSLT(x.value, y.value), Int4::getType())) ^ Int4(0xFFFFFFFF);
5487 RValue<Int4> CmpNLE(RValue<Int4> x, RValue<Int4> y)
5489 return RValue<Int4>(Nucleus::createSExt(Nucleus::createICmpSGT(x.value, y.value), Int4::getType()));
5492 RValue<Int4> Max(RValue<Int4> x, RValue<Int4> y)
5494 if(CPUID::supportsSSE4_1())
5496 return x86::pmaxsd(x, y);
5500 RValue<Int4> greater = CmpNLE(x, y);
5501 return x & greater | y & ~greater;
5505 RValue<Int4> Min(RValue<Int4> x, RValue<Int4> y)
5507 if(CPUID::supportsSSE4_1())
5509 return x86::pminsd(x, y);
5513 RValue<Int4> less = CmpLT(x, y);
5514 return x & less | y & ~less;
5518 RValue<Int4> RoundInt(RValue<Float4> cast)
5520 return x86::cvtps2dq(cast);
5523 RValue<Short8> Pack(RValue<Int4> x, RValue<Int4> y)
5525 return x86::packssdw(x, y);
5528 RValue<Int> Extract(RValue<Int4> x, int i)
5530 return RValue<Int>(Nucleus::createExtractElement(x.value, Int::getType(), i));
5533 RValue<Int4> Insert(RValue<Int4> x, RValue<Int> element, int i)
5535 return RValue<Int4>(Nucleus::createInsertElement(x.value, element.value, i));
5538 RValue<Int> SignMask(RValue<Int4> x)
5540 return x86::movmskps(As<Float4>(x));
5543 RValue<Int4> Swizzle(RValue<Int4> x, unsigned char select)
5545 return RValue<Int4>(createSwizzle4(x.value, select));
5548 Type *Int4::getType()
5550 return T(VectorType::get(Int::getType(), 4));
5553 UInt4::UInt4(RValue<Float4> cast)
5555 // xyzw.parent = this;
5557 // Note: createFPToUI is broken, must perform conversion using createFPtoSI
5558 // Value *xyzw = Nucleus::createFPToUI(cast.value, UInt4::getType());
5560 // Smallest positive value representable in UInt, but not in Int
5561 const unsigned int ustart = 0x80000000u;
5562 const float ustartf = float(ustart);
5564 // Check if the value can be represented as an Int
5565 Int4 uiValue = CmpNLT(cast, Float4(ustartf));
5566 // If the value is too large, subtract ustart and re-add it after conversion.
5567 uiValue = (uiValue & As<Int4>(As<UInt4>(Int4(cast - Float4(ustartf))) + UInt4(ustart))) |
5568 // Otherwise, just convert normally
5569 (~uiValue & Int4(cast));
5570 // If the value is negative, store 0, otherwise store the result of the conversion
5571 storeValue((~(As<Int4>(cast) >> 31) & uiValue).value);
5576 // xyzw.parent = this;
5579 UInt4::UInt4(int xyzw)
5581 constant(xyzw, xyzw, xyzw, xyzw);
5584 UInt4::UInt4(int x, int yzw)
5586 constant(x, yzw, yzw, yzw);
5589 UInt4::UInt4(int x, int y, int zw)
5591 constant(x, y, zw, zw);
5594 UInt4::UInt4(int x, int y, int z, int w)
5596 constant(x, y, z, w);
5599 void UInt4::constant(int x, int y, int z, int w)
5601 // xyzw.parent = this;
5603 int64_t constantVector[4] = {x, y, z, w};
5604 storeValue(Nucleus::createConstantVector(constantVector, getType()));
5607 UInt4::UInt4(RValue<UInt4> rhs)
5609 // xyzw.parent = this;
5611 storeValue(rhs.value);
5614 UInt4::UInt4(const UInt4 &rhs)
5616 // xyzw.parent = this;
5618 Value *value = rhs.loadValue();
5622 UInt4::UInt4(const Reference<UInt4> &rhs)
5624 // xyzw.parent = this;
5626 Value *value = rhs.loadValue();
5630 UInt4::UInt4(RValue<Int4> rhs)
5632 // xyzw.parent = this;
5634 storeValue(rhs.value);
5637 UInt4::UInt4(const Int4 &rhs)
5639 // xyzw.parent = this;
5641 Value *value = rhs.loadValue();
5645 UInt4::UInt4(const Reference<Int4> &rhs)
5647 // xyzw.parent = this;
5649 Value *value = rhs.loadValue();
5653 UInt4::UInt4(RValue<UInt2> lo, RValue<UInt2> hi)
5655 Value *loLong = Nucleus::createBitCast(lo.value, Long::getType());
5656 Value *hiLong = Nucleus::createBitCast(hi.value, Long::getType());
5658 Value *long2 = V(UndefValue::get(VectorType::get(Long::getType(), 2)));
5659 long2 = Nucleus::createInsertElement(long2, loLong, 0);
5660 long2 = Nucleus::createInsertElement(long2, hiLong, 1);
5661 Value *uint4 = Nucleus::createBitCast(long2, Int4::getType());
5666 RValue<UInt4> UInt4::operator=(RValue<UInt4> rhs) const
5668 storeValue(rhs.value);
5673 RValue<UInt4> UInt4::operator=(const UInt4 &rhs) const
5675 Value *value = rhs.loadValue();
5678 return RValue<UInt4>(value);
5681 RValue<UInt4> UInt4::operator=(const Reference<UInt4> &rhs) const
5683 Value *value = rhs.loadValue();
5686 return RValue<UInt4>(value);
5689 RValue<UInt4> operator+(RValue<UInt4> lhs, RValue<UInt4> rhs)
5691 return RValue<UInt4>(Nucleus::createAdd(lhs.value, rhs.value));
5694 RValue<UInt4> operator-(RValue<UInt4> lhs, RValue<UInt4> rhs)
5696 return RValue<UInt4>(Nucleus::createSub(lhs.value, rhs.value));
5699 RValue<UInt4> operator*(RValue<UInt4> lhs, RValue<UInt4> rhs)
5701 return RValue<UInt4>(Nucleus::createMul(lhs.value, rhs.value));
5704 RValue<UInt4> operator/(RValue<UInt4> lhs, RValue<UInt4> rhs)
5706 return RValue<UInt4>(Nucleus::createUDiv(lhs.value, rhs.value));
5709 RValue<UInt4> operator%(RValue<UInt4> lhs, RValue<UInt4> rhs)
5711 return RValue<UInt4>(Nucleus::createURem(lhs.value, rhs.value));
5714 RValue<UInt4> operator&(RValue<UInt4> lhs, RValue<UInt4> rhs)
5716 return RValue<UInt4>(Nucleus::createAnd(lhs.value, rhs.value));
5719 RValue<UInt4> operator|(RValue<UInt4> lhs, RValue<UInt4> rhs)
5721 return RValue<UInt4>(Nucleus::createOr(lhs.value, rhs.value));
5724 RValue<UInt4> operator^(RValue<UInt4> lhs, RValue<UInt4> rhs)
5726 return RValue<UInt4>(Nucleus::createXor(lhs.value, rhs.value));
5729 RValue<UInt4> operator<<(RValue<UInt4> lhs, unsigned char rhs)
5731 return As<UInt4>(x86::pslld(As<Int4>(lhs), rhs));
5734 RValue<UInt4> operator>>(RValue<UInt4> lhs, unsigned char rhs)
5736 return x86::psrld(lhs, rhs);
5739 RValue<UInt4> operator<<(RValue<UInt4> lhs, RValue<UInt4> rhs)
5741 return RValue<UInt4>(Nucleus::createShl(lhs.value, rhs.value));
5744 RValue<UInt4> operator>>(RValue<UInt4> lhs, RValue<UInt4> rhs)
5746 return RValue<UInt4>(Nucleus::createLShr(lhs.value, rhs.value));
5749 RValue<UInt4> operator+=(const UInt4 &lhs, RValue<UInt4> rhs)
5751 return lhs = lhs + rhs;
5754 RValue<UInt4> operator-=(const UInt4 &lhs, RValue<UInt4> rhs)
5756 return lhs = lhs - rhs;
5759 RValue<UInt4> operator*=(const UInt4 &lhs, RValue<UInt4> rhs)
5761 return lhs = lhs * rhs;
5764 // RValue<UInt4> operator/=(const UInt4 &lhs, RValue<UInt4> rhs)
5766 // return lhs = lhs / rhs;
5769 // RValue<UInt4> operator%=(const UInt4 &lhs, RValue<UInt4> rhs)
5771 // return lhs = lhs % rhs;
5774 RValue<UInt4> operator&=(const UInt4 &lhs, RValue<UInt4> rhs)
5776 return lhs = lhs & rhs;
5779 RValue<UInt4> operator|=(const UInt4 &lhs, RValue<UInt4> rhs)
5781 return lhs = lhs | rhs;
5784 RValue<UInt4> operator^=(const UInt4 &lhs, RValue<UInt4> rhs)
5786 return lhs = lhs ^ rhs;
5789 RValue<UInt4> operator<<=(const UInt4 &lhs, unsigned char rhs)
5791 return lhs = lhs << rhs;
5794 RValue<UInt4> operator>>=(const UInt4 &lhs, unsigned char rhs)
5796 return lhs = lhs >> rhs;
5799 RValue<UInt4> operator+(RValue<UInt4> val)
5804 RValue<UInt4> operator-(RValue<UInt4> val)
5806 return RValue<UInt4>(Nucleus::createNeg(val.value));
5809 RValue<UInt4> operator~(RValue<UInt4> val)
5811 return RValue<UInt4>(Nucleus::createNot(val.value));
5814 RValue<UInt4> CmpEQ(RValue<UInt4> x, RValue<UInt4> y)
5816 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5817 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5818 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpEQ(x.value, y.value), Int4::getType()));
5819 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
5822 RValue<UInt4> CmpLT(RValue<UInt4> x, RValue<UInt4> y)
5824 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType()));
5827 RValue<UInt4> CmpLE(RValue<UInt4> x, RValue<UInt4> y)
5829 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5830 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5831 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULE(x.value, y.value), Int4::getType()));
5832 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
5835 RValue<UInt4> CmpNEQ(RValue<UInt4> x, RValue<UInt4> y)
5837 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpNE(x.value, y.value), Int4::getType()));
5840 RValue<UInt4> CmpNLT(RValue<UInt4> x, RValue<UInt4> y)
5842 // FIXME: An LLVM bug causes SExt(ICmpCC()) to produce 0 or 1 instead of 0 or ~0
5843 // Restore the following line when LLVM is updated to a version where this issue is fixed.
5844 // return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGE(x.value, y.value), Int4::getType()));
5845 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpULT(x.value, y.value), Int4::getType())) ^ UInt4(0xFFFFFFFF);
5848 RValue<UInt4> CmpNLE(RValue<UInt4> x, RValue<UInt4> y)
5850 return RValue<UInt4>(Nucleus::createSExt(Nucleus::createICmpUGT(x.value, y.value), Int4::getType()));
5853 RValue<UInt4> Max(RValue<UInt4> x, RValue<UInt4> y)
5855 if(CPUID::supportsSSE4_1())
5857 return x86::pmaxud(x, y);
5861 RValue<UInt4> greater = CmpNLE(x, y);
5862 return x & greater | y & ~greater;
5866 RValue<UInt4> Min(RValue<UInt4> x, RValue<UInt4> y)
5868 if(CPUID::supportsSSE4_1())
5870 return x86::pminud(x, y);
5874 RValue<UInt4> less = CmpLT(x, y);
5875 return x & less | y & ~less;
5879 RValue<UShort8> Pack(RValue<UInt4> x, RValue<UInt4> y)
5881 return x86::packusdw(x, y); // FIXME: Fallback required
5884 Type *UInt4::getType()
5886 return T(VectorType::get(UInt::getType(), 4));
5889 Float::Float(RValue<Int> cast)
5891 Value *integer = Nucleus::createSIToFP(cast.value, Float::getType());
5893 storeValue(integer);
5901 Float::Float(float x)
5903 storeValue(Nucleus::createConstantFloat(x));
5906 Float::Float(RValue<Float> rhs)
5908 storeValue(rhs.value);
5911 Float::Float(const Float &rhs)
5913 Value *value = rhs.loadValue();
5917 Float::Float(const Reference<Float> &rhs)
5919 Value *value = rhs.loadValue();
5923 RValue<Float> Float::operator=(RValue<Float> rhs) const
5925 storeValue(rhs.value);
5930 RValue<Float> Float::operator=(const Float &rhs) const
5932 Value *value = rhs.loadValue();
5935 return RValue<Float>(value);
5938 RValue<Float> Float::operator=(const Reference<Float> &rhs) const
5940 Value *value = rhs.loadValue();
5943 return RValue<Float>(value);
5946 RValue<Float> operator+(RValue<Float> lhs, RValue<Float> rhs)
5948 return RValue<Float>(Nucleus::createFAdd(lhs.value, rhs.value));
5951 RValue<Float> operator-(RValue<Float> lhs, RValue<Float> rhs)
5953 return RValue<Float>(Nucleus::createFSub(lhs.value, rhs.value));
5956 RValue<Float> operator*(RValue<Float> lhs, RValue<Float> rhs)
5958 return RValue<Float>(Nucleus::createFMul(lhs.value, rhs.value));
5961 RValue<Float> operator/(RValue<Float> lhs, RValue<Float> rhs)
5963 return RValue<Float>(Nucleus::createFDiv(lhs.value, rhs.value));
5966 RValue<Float> operator+=(const Float &lhs, RValue<Float> rhs)
5968 return lhs = lhs + rhs;
5971 RValue<Float> operator-=(const Float &lhs, RValue<Float> rhs)
5973 return lhs = lhs - rhs;
5976 RValue<Float> operator*=(const Float &lhs, RValue<Float> rhs)
5978 return lhs = lhs * rhs;
5981 RValue<Float> operator/=(const Float &lhs, RValue<Float> rhs)
5983 return lhs = lhs / rhs;
5986 RValue<Float> operator+(RValue<Float> val)
5991 RValue<Float> operator-(RValue<Float> val)
5993 return RValue<Float>(Nucleus::createFNeg(val.value));
5996 RValue<Bool> operator<(RValue<Float> lhs, RValue<Float> rhs)
5998 return RValue<Bool>(Nucleus::createFCmpOLT(lhs.value, rhs.value));
6001 RValue<Bool> operator<=(RValue<Float> lhs, RValue<Float> rhs)
6003 return RValue<Bool>(Nucleus::createFCmpOLE(lhs.value, rhs.value));
6006 RValue<Bool> operator>(RValue<Float> lhs, RValue<Float> rhs)
6008 return RValue<Bool>(Nucleus::createFCmpOGT(lhs.value, rhs.value));
6011 RValue<Bool> operator>=(RValue<Float> lhs, RValue<Float> rhs)
6013 return RValue<Bool>(Nucleus::createFCmpOGE(lhs.value, rhs.value));
6016 RValue<Bool> operator!=(RValue<Float> lhs, RValue<Float> rhs)
6018 return RValue<Bool>(Nucleus::createFCmpONE(lhs.value, rhs.value));
6021 RValue<Bool> operator==(RValue<Float> lhs, RValue<Float> rhs)
6023 return RValue<Bool>(Nucleus::createFCmpOEQ(lhs.value, rhs.value));
6026 RValue<Float> Abs(RValue<Float> x)
6028 return IfThenElse(x > 0.0f, x, -x);
6031 RValue<Float> Max(RValue<Float> x, RValue<Float> y)
6033 return IfThenElse(x > y, x, y);
6036 RValue<Float> Min(RValue<Float> x, RValue<Float> y)
6038 return IfThenElse(x < y, x, y);
6041 RValue<Float> Rcp_pp(RValue<Float> x, bool exactAtPow2)
6045 // rcpss uses a piecewise-linear approximation which minimizes the relative error
6046 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
6047 return x86::rcpss(x) * Float(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
6051 return x86::rcpss(x);
6055 RValue<Float> RcpSqrt_pp(RValue<Float> x)
6057 return x86::rsqrtss(x);
6060 RValue<Float> Sqrt(RValue<Float> x)
6062 return x86::sqrtss(x);
6065 RValue<Float> Round(RValue<Float> x)
6067 if(CPUID::supportsSSE4_1())
6069 return x86::roundss(x, 0);
6073 return Float4(Round(Float4(x))).x;
6077 RValue<Float> Trunc(RValue<Float> x)
6079 if(CPUID::supportsSSE4_1())
6081 return x86::roundss(x, 3);
6085 return Float(Int(x)); // Rounded toward zero
6089 RValue<Float> Frac(RValue<Float> x)
6091 if(CPUID::supportsSSE4_1())
6093 return x - x86::floorss(x);
6097 return Float4(Frac(Float4(x))).x;
6101 RValue<Float> Floor(RValue<Float> x)
6103 if(CPUID::supportsSSE4_1())
6105 return x86::floorss(x);
6109 return Float4(Floor(Float4(x))).x;
6113 RValue<Float> Ceil(RValue<Float> x)
6115 if(CPUID::supportsSSE4_1())
6117 return x86::ceilss(x);
6121 return Float4(Ceil(Float4(x))).x;
6125 Type *Float::getType()
6127 return T(llvm::Type::getFloatTy(*::context));
6130 Float2::Float2(RValue<Float4> cast)
6132 // xyzw.parent = this;
6134 Value *int64x2 = Nucleus::createBitCast(cast.value, T(VectorType::get(Long::getType(), 2)));
6135 Value *int64 = Nucleus::createExtractElement(int64x2, Long::getType(), 0);
6136 Value *float2 = Nucleus::createBitCast(int64, Float2::getType());
6141 Type *Float2::getType()
6143 return T(VectorType::get(Float::getType(), 2));
6146 Float4::Float4(RValue<Byte4> cast)
6151 Value *xyzw = Nucleus::createUIToFP(cast.value, Float4::getType()); // FIXME: Crashes
6153 Value *vector = loadValue();
6155 Value *i8x = Nucleus::createExtractElement(cast.value, 0);
6156 Value *f32x = Nucleus::createUIToFP(i8x, Float::getType());
6157 Value *x = Nucleus::createInsertElement(vector, f32x, 0);
6159 Value *i8y = Nucleus::createExtractElement(cast.value, V(Nucleus::createConstantInt(1)));
6160 Value *f32y = Nucleus::createUIToFP(i8y, Float::getType());
6161 Value *xy = Nucleus::createInsertElement(x, f32y, V(Nucleus::createConstantInt(1)));
6163 Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
6164 Value *f32z = Nucleus::createUIToFP(i8z, Float::getType());
6165 Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
6167 Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
6168 Value *f32w = Nucleus::createUIToFP(i8w, Float::getType());
6169 Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
6171 Value *a = Int4(cast).loadValue();
6172 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
6178 Float4::Float4(RValue<SByte4> cast)
6183 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType()); // FIXME: Crashes
6185 Value *vector = loadValue();
6187 Value *i8x = Nucleus::createExtractElement(cast.value, 0);
6188 Value *f32x = Nucleus::createSIToFP(i8x, Float::getType());
6189 Value *x = Nucleus::createInsertElement(vector, f32x, 0);
6191 Value *i8y = Nucleus::createExtractElement(cast.value, V(Nucleus::createConstantInt(1)));
6192 Value *f32y = Nucleus::createSIToFP(i8y, Float::getType());
6193 Value *xy = Nucleus::createInsertElement(x, f32y, V(Nucleus::createConstantInt(1)));
6195 Value *i8z = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(2));
6196 Value *f32z = Nucleus::createSIToFP(i8z, Float::getType());
6197 Value *xyz = Nucleus::createInsertElement(xy, f32z, Nucleus::createConstantInt(2));
6199 Value *i8w = Nucleus::createExtractElement(cast.value, Nucleus::createConstantInt(3));
6200 Value *f32w = Nucleus::createSIToFP(i8w, Float::getType());
6201 Value *xyzw = Nucleus::createInsertElement(xyz, f32w, Nucleus::createConstantInt(3));
6203 Value *a = Int4(cast).loadValue();
6204 Value *xyzw = Nucleus::createSIToFP(a, Float4::getType());
6210 Float4::Float4(RValue<Short4> cast)
6215 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
6218 Float4::Float4(RValue<UShort4> cast)
6223 storeValue(Nucleus::createSIToFP(RValue<Int4>(c).value, Float4::getType()));
6226 Float4::Float4(RValue<Int4> cast)
6230 Value *xyzw = Nucleus::createSIToFP(cast.value, Float4::getType());
6235 Float4::Float4(RValue<UInt4> cast)
6239 Value *xyzw = Nucleus::createUIToFP(cast.value, Float4::getType());
6249 Float4::Float4(float xyzw)
6251 constant(xyzw, xyzw, xyzw, xyzw);
6254 Float4::Float4(float x, float yzw)
6256 constant(x, yzw, yzw, yzw);
6259 Float4::Float4(float x, float y, float zw)
6261 constant(x, y, zw, zw);
6264 Float4::Float4(float x, float y, float z, float w)
6266 constant(x, y, z, w);
6269 void Float4::constant(float x, float y, float z, float w)
6273 double constantVector[4] = {x, y, z, w};
6274 storeValue(Nucleus::createConstantVector(constantVector, getType()));
6277 Float4::Float4(RValue<Float4> rhs)
6281 storeValue(rhs.value);
6284 Float4::Float4(const Float4 &rhs)
6288 Value *value = rhs.loadValue();
6292 Float4::Float4(const Reference<Float4> &rhs)
6296 Value *value = rhs.loadValue();
6300 Float4::Float4(RValue<Float> rhs)
6304 Value *vector = loadValue();
6305 Value *insert = Nucleus::createInsertElement(vector, rhs.value, 0);
6307 int swizzle[4] = {0, 0, 0, 0};
6308 Value *replicate = Nucleus::createShuffleVector(insert, insert, swizzle);
6310 storeValue(replicate);
6313 Float4::Float4(const Float &rhs)
6317 *this = RValue<Float>(rhs.loadValue());
6320 Float4::Float4(const Reference<Float> &rhs)
6324 *this = RValue<Float>(rhs.loadValue());
6327 RValue<Float4> Float4::operator=(float x) const
6329 return *this = Float4(x, x, x, x);
6332 RValue<Float4> Float4::operator=(RValue<Float4> rhs) const
6334 storeValue(rhs.value);
6339 RValue<Float4> Float4::operator=(const Float4 &rhs) const
6341 Value *value = rhs.loadValue();
6344 return RValue<Float4>(value);
6347 RValue<Float4> Float4::operator=(const Reference<Float4> &rhs) const
6349 Value *value = rhs.loadValue();
6352 return RValue<Float4>(value);
6355 RValue<Float4> Float4::operator=(RValue<Float> rhs) const
6357 return *this = Float4(rhs);
6360 RValue<Float4> Float4::operator=(const Float &rhs) const
6362 return *this = Float4(rhs);
6365 RValue<Float4> Float4::operator=(const Reference<Float> &rhs) const
6367 return *this = Float4(rhs);
6370 RValue<Float4> operator+(RValue<Float4> lhs, RValue<Float4> rhs)
6372 return RValue<Float4>(Nucleus::createFAdd(lhs.value, rhs.value));
6375 RValue<Float4> operator-(RValue<Float4> lhs, RValue<Float4> rhs)
6377 return RValue<Float4>(Nucleus::createFSub(lhs.value, rhs.value));
6380 RValue<Float4> operator*(RValue<Float4> lhs, RValue<Float4> rhs)
6382 return RValue<Float4>(Nucleus::createFMul(lhs.value, rhs.value));
6385 RValue<Float4> operator/(RValue<Float4> lhs, RValue<Float4> rhs)
6387 return RValue<Float4>(Nucleus::createFDiv(lhs.value, rhs.value));
6390 RValue<Float4> operator%(RValue<Float4> lhs, RValue<Float4> rhs)
6392 return RValue<Float4>(Nucleus::createFRem(lhs.value, rhs.value));
6395 RValue<Float4> operator+=(const Float4 &lhs, RValue<Float4> rhs)
6397 return lhs = lhs + rhs;
6400 RValue<Float4> operator-=(const Float4 &lhs, RValue<Float4> rhs)
6402 return lhs = lhs - rhs;
6405 RValue<Float4> operator*=(const Float4 &lhs, RValue<Float4> rhs)
6407 return lhs = lhs * rhs;
6410 RValue<Float4> operator/=(const Float4 &lhs, RValue<Float4> rhs)
6412 return lhs = lhs / rhs;
6415 RValue<Float4> operator%=(const Float4 &lhs, RValue<Float4> rhs)
6417 return lhs = lhs % rhs;
6420 RValue<Float4> operator+(RValue<Float4> val)
6425 RValue<Float4> operator-(RValue<Float4> val)
6427 return RValue<Float4>(Nucleus::createFNeg(val.value));
6430 RValue<Float4> Abs(RValue<Float4> x)
6432 Value *vector = Nucleus::createBitCast(x.value, Int4::getType());
6433 int64_t constantVector[4] = {0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
6434 Value *result = Nucleus::createAnd(vector, V(Nucleus::createConstantVector(constantVector, Int4::getType())));
6436 return RValue<Float4>(Nucleus::createBitCast(result, Float4::getType()));
6439 RValue<Float4> Max(RValue<Float4> x, RValue<Float4> y)
6441 return x86::maxps(x, y);
6444 RValue<Float4> Min(RValue<Float4> x, RValue<Float4> y)
6446 return x86::minps(x, y);
6449 RValue<Float4> Rcp_pp(RValue<Float4> x, bool exactAtPow2)
6453 // rcpps uses a piecewise-linear approximation which minimizes the relative error
6454 // but is not exact at power-of-two values. Rectify by multiplying by the inverse.
6455 return x86::rcpps(x) * Float4(1.0f / _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ps1(1.0f))));
6459 return x86::rcpps(x);
6463 RValue<Float4> RcpSqrt_pp(RValue<Float4> x)
6465 return x86::rsqrtps(x);
6468 RValue<Float4> Sqrt(RValue<Float4> x)
6470 return x86::sqrtps(x);
6473 RValue<Float4> Insert(RValue<Float4> val, RValue<Float> element, int i)
6475 return RValue<Float4>(Nucleus::createInsertElement(val.value, element.value, i));
6478 RValue<Float> Extract(RValue<Float4> x, int i)
6480 return RValue<Float>(Nucleus::createExtractElement(x.value, Float::getType(), i));
6483 RValue<Float4> Swizzle(RValue<Float4> x, unsigned char select)
6485 return RValue<Float4>(createSwizzle4(x.value, select));
6488 RValue<Float4> ShuffleLowHigh(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
6492 ((imm >> 0) & 0x03) + 0,
6493 ((imm >> 2) & 0x03) + 0,
6494 ((imm >> 4) & 0x03) + 4,
6495 ((imm >> 6) & 0x03) + 4,
6498 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6501 RValue<Float4> UnpackLow(RValue<Float4> x, RValue<Float4> y)
6503 int shuffle[4] = {0, 4, 1, 5};
6504 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6507 RValue<Float4> UnpackHigh(RValue<Float4> x, RValue<Float4> y)
6509 int shuffle[4] = {2, 6, 3, 7};
6510 return RValue<Float4>(Nucleus::createShuffleVector(x.value, y.value, shuffle));
6513 RValue<Float4> Mask(Float4 &lhs, RValue<Float4> rhs, unsigned char select)
6515 Value *vector = lhs.loadValue();
6516 Value *shuffle = createMask4(vector, rhs.value, select);
6517 lhs.storeValue(shuffle);
6519 return RValue<Float4>(shuffle);
6522 RValue<Int> SignMask(RValue<Float4> x)
6524 return x86::movmskps(x);
6527 RValue<Int4> CmpEQ(RValue<Float4> x, RValue<Float4> y)
6529 // return As<Int4>(x86::cmpeqps(x, y));
6530 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOEQ(x.value, y.value), Int4::getType()));
6533 RValue<Int4> CmpLT(RValue<Float4> x, RValue<Float4> y)
6535 // return As<Int4>(x86::cmpltps(x, y));
6536 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLT(x.value, y.value), Int4::getType()));
6539 RValue<Int4> CmpLE(RValue<Float4> x, RValue<Float4> y)
6541 // return As<Int4>(x86::cmpleps(x, y));
6542 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOLE(x.value, y.value), Int4::getType()));
6545 RValue<Int4> CmpNEQ(RValue<Float4> x, RValue<Float4> y)
6547 // return As<Int4>(x86::cmpneqps(x, y));
6548 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpONE(x.value, y.value), Int4::getType()));
6551 RValue<Int4> CmpNLT(RValue<Float4> x, RValue<Float4> y)
6553 // return As<Int4>(x86::cmpnltps(x, y));
6554 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGE(x.value, y.value), Int4::getType()));
6557 RValue<Int4> CmpNLE(RValue<Float4> x, RValue<Float4> y)
6559 // return As<Int4>(x86::cmpnleps(x, y));
6560 return RValue<Int4>(Nucleus::createSExt(Nucleus::createFCmpOGT(x.value, y.value), Int4::getType()));
6563 RValue<Float4> Round(RValue<Float4> x)
6565 if(CPUID::supportsSSE4_1())
6567 return x86::roundps(x, 0);
6571 return Float4(RoundInt(x));
6575 RValue<Float4> Trunc(RValue<Float4> x)
6577 if(CPUID::supportsSSE4_1())
6579 return x86::roundps(x, 3);
6583 return Float4(Int4(x)); // Rounded toward zero
6587 RValue<Float4> Frac(RValue<Float4> x)
6589 if(CPUID::supportsSSE4_1())
6591 return x - x86::floorps(x);
6595 Float4 frc = x - Float4(Int4(x)); // Signed fractional part
6597 return frc + As<Float4>(As<Int4>(CmpNLE(Float4(0.0f), frc)) & As<Int4>(Float4(1, 1, 1, 1)));
6601 RValue<Float4> Floor(RValue<Float4> x)
6603 if(CPUID::supportsSSE4_1())
6605 return x86::floorps(x);
6613 RValue<Float4> Ceil(RValue<Float4> x)
6615 if(CPUID::supportsSSE4_1())
6617 return x86::ceilps(x);
6625 Type *Float4::getType()
6627 return T(VectorType::get(Float::getType(), 4));
6630 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, int offset)
6632 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), V(Nucleus::createConstantInt(offset))));
6635 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6637 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value));
6640 RValue<Pointer<Byte>> operator+(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6642 return RValue<Pointer<Byte>>(Nucleus::createGEP(lhs.value, Byte::getType(), offset.value));
6645 RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, int offset)
6647 return lhs = lhs + offset;
6650 RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, RValue<Int> offset)
6652 return lhs = lhs + offset;
6655 RValue<Pointer<Byte>> operator+=(const Pointer<Byte> &lhs, RValue<UInt> offset)
6657 return lhs = lhs + offset;
6660 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, int offset)
6662 return lhs + -offset;
6665 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<Int> offset)
6667 return lhs + -offset;
6670 RValue<Pointer<Byte>> operator-(RValue<Pointer<Byte>> lhs, RValue<UInt> offset)
6672 return lhs + -offset;
6675 RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, int offset)
6677 return lhs = lhs - offset;
6680 RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, RValue<Int> offset)
6682 return lhs = lhs - offset;
6685 RValue<Pointer<Byte>> operator-=(const Pointer<Byte> &lhs, RValue<UInt> offset)
6687 return lhs = lhs - offset;
6692 Nucleus::createRetVoid();
6693 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6694 Nucleus::createUnreachable();
6697 void Return(bool ret)
6699 Nucleus::createRet(V(Nucleus::createConstantBool(ret)));
6700 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6701 Nucleus::createUnreachable();
6704 void Return(const Int &ret)
6706 Nucleus::createRet(ret.loadValue());
6707 Nucleus::setInsertBlock(Nucleus::createBasicBlock());
6708 Nucleus::createUnreachable();
6711 bool branch(RValue<Bool> cmp, BasicBlock *bodyBB, BasicBlock *endBB)
6713 Nucleus::createCondBr(cmp.value, bodyBB, endBB);
6714 Nucleus::setInsertBlock(bodyBB);
6719 void endIf(BasicBlock *falseBB)
6721 ::falseBB = falseBB;
6724 bool elseBlock(BasicBlock *falseBB)
6726 assert(falseBB && "Else not preceded by If");
6727 falseBB->back().eraseFromParent();
6728 Nucleus::setInsertBlock(falseBB);
6733 BasicBlock *beginElse()
6735 BasicBlock *falseBB = ::falseBB;
6736 ::falseBB = nullptr;
6741 RValue<Long> Ticks()
6743 llvm::Function *rdtsc = Intrinsic::getDeclaration(::module, Intrinsic::readcyclecounter);
6745 return RValue<Long>(V(::builder->CreateCall(rdtsc)));
6753 RValue<Int> cvtss2si(RValue<Float> val)
6755 llvm::Function *cvtss2si = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cvtss2si);
6760 return RValue<Int>(V(::builder->CreateCall(cvtss2si, RValue<Float4>(vector).value)));
6763 RValue<Int2> cvtps2pi(RValue<Float4> val)
6765 llvm::Function *cvtps2pi = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cvtps2pi);
6767 return RValue<Int2>(V(::builder->CreateCall(cvtps2pi, val.value)));
6770 RValue<Int2> cvttps2pi(RValue<Float4> val)
6772 llvm::Function *cvttps2pi = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cvttps2pi);
6774 return RValue<Int2>(V(::builder->CreateCall(cvttps2pi, val.value)));
6777 RValue<Int4> cvtps2dq(RValue<Float4> val)
6779 if(CPUID::supportsSSE2())
6781 llvm::Function *cvtps2dq = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_cvtps2dq);
6783 return RValue<Int4>(V(::builder->CreateCall(cvtps2dq, val.value)));
6787 Int2 lo = x86::cvtps2pi(val);
6788 Int2 hi = x86::cvtps2pi(Swizzle(val, 0xEE));
6790 return Int4(lo, hi);
6794 RValue<Float> rcpss(RValue<Float> val)
6796 llvm::Function *rcpss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rcp_ss);
6798 Value *vector = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), val.value, 0);
6800 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rcpss, vector)), Float::getType(), 0));
6803 RValue<Float> sqrtss(RValue<Float> val)
6805 llvm::Function *sqrtss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_sqrt_ss);
6807 Value *vector = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), val.value, 0);
6809 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(sqrtss, vector)), Float::getType(), 0));
6812 RValue<Float> rsqrtss(RValue<Float> val)
6814 llvm::Function *rsqrtss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rsqrt_ss);
6816 Value *vector = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), val.value, 0);
6818 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall(rsqrtss, vector)), Float::getType(), 0));
6821 RValue<Float4> rcpps(RValue<Float4> val)
6823 llvm::Function *rcpps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rcp_ps);
6825 return RValue<Float4>(V(::builder->CreateCall(rcpps, val.value)));
6828 RValue<Float4> sqrtps(RValue<Float4> val)
6830 llvm::Function *sqrtps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_sqrt_ps);
6832 return RValue<Float4>(V(::builder->CreateCall(sqrtps, val.value)));
6835 RValue<Float4> rsqrtps(RValue<Float4> val)
6837 llvm::Function *rsqrtps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_rsqrt_ps);
6839 return RValue<Float4>(V(::builder->CreateCall(rsqrtps, val.value)));
6842 RValue<Float4> maxps(RValue<Float4> x, RValue<Float4> y)
6844 llvm::Function *maxps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_max_ps);
6846 return RValue<Float4>(V(::builder->CreateCall2(maxps, x.value, y.value)));
6849 RValue<Float4> minps(RValue<Float4> x, RValue<Float4> y)
6851 llvm::Function *minps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_min_ps);
6853 return RValue<Float4>(V(::builder->CreateCall2(minps, x.value, y.value)));
6856 RValue<Float> roundss(RValue<Float> val, unsigned char imm)
6858 llvm::Function *roundss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_round_ss);
6860 Value *undef = V(UndefValue::get(Float4::getType()));
6861 Value *vector = Nucleus::createInsertElement(undef, val.value, 0);
6863 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(roundss, undef, vector, V(Nucleus::createConstantInt(imm)))), Float::getType(), 0));
6866 RValue<Float> floorss(RValue<Float> val)
6868 return roundss(val, 1);
6871 RValue<Float> ceilss(RValue<Float> val)
6873 return roundss(val, 2);
6876 RValue<Float4> roundps(RValue<Float4> val, unsigned char imm)
6878 llvm::Function *roundps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_round_ps);
6880 return RValue<Float4>(V(::builder->CreateCall2(roundps, val.value, V(Nucleus::createConstantInt(imm)))));
6883 RValue<Float4> floorps(RValue<Float4> val)
6885 return roundps(val, 1);
6888 RValue<Float4> ceilps(RValue<Float4> val)
6890 return roundps(val, 2);
6893 RValue<Float4> cmpps(RValue<Float4> x, RValue<Float4> y, unsigned char imm)
6895 llvm::Function *cmpps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cmp_ps);
6897 return RValue<Float4>(V(::builder->CreateCall3(cmpps, x.value, y.value, V(Nucleus::createConstantByte(imm)))));
6900 RValue<Float4> cmpeqps(RValue<Float4> x, RValue<Float4> y)
6902 return cmpps(x, y, 0);
6905 RValue<Float4> cmpltps(RValue<Float4> x, RValue<Float4> y)
6907 return cmpps(x, y, 1);
6910 RValue<Float4> cmpleps(RValue<Float4> x, RValue<Float4> y)
6912 return cmpps(x, y, 2);
6915 RValue<Float4> cmpunordps(RValue<Float4> x, RValue<Float4> y)
6917 return cmpps(x, y, 3);
6920 RValue<Float4> cmpneqps(RValue<Float4> x, RValue<Float4> y)
6922 return cmpps(x, y, 4);
6925 RValue<Float4> cmpnltps(RValue<Float4> x, RValue<Float4> y)
6927 return cmpps(x, y, 5);
6930 RValue<Float4> cmpnleps(RValue<Float4> x, RValue<Float4> y)
6932 return cmpps(x, y, 6);
6935 RValue<Float4> cmpordps(RValue<Float4> x, RValue<Float4> y)
6937 return cmpps(x, y, 7);
6940 RValue<Float> cmpss(RValue<Float> x, RValue<Float> y, unsigned char imm)
6942 llvm::Function *cmpss = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_cmp_ss);
6944 Value *vector1 = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), x.value, 0);
6945 Value *vector2 = Nucleus::createInsertElement(V(UndefValue::get(Float4::getType())), y.value, 0);
6947 return RValue<Float>(Nucleus::createExtractElement(V(::builder->CreateCall3(cmpss, vector1, vector2, V(Nucleus::createConstantByte(imm)))), Float::getType(), 0));
6950 RValue<Float> cmpeqss(RValue<Float> x, RValue<Float> y)
6952 return cmpss(x, y, 0);
6955 RValue<Float> cmpltss(RValue<Float> x, RValue<Float> y)
6957 return cmpss(x, y, 1);
6960 RValue<Float> cmpless(RValue<Float> x, RValue<Float> y)
6962 return cmpss(x, y, 2);
6965 RValue<Float> cmpunordss(RValue<Float> x, RValue<Float> y)
6967 return cmpss(x, y, 3);
6970 RValue<Float> cmpneqss(RValue<Float> x, RValue<Float> y)
6972 return cmpss(x, y, 4);
6975 RValue<Float> cmpnltss(RValue<Float> x, RValue<Float> y)
6977 return cmpss(x, y, 5);
6980 RValue<Float> cmpnless(RValue<Float> x, RValue<Float> y)
6982 return cmpss(x, y, 6);
6985 RValue<Float> cmpordss(RValue<Float> x, RValue<Float> y)
6987 return cmpss(x, y, 7);
6990 RValue<Int4> pabsd(RValue<Int4> x)
6992 llvm::Function *pabsd = Intrinsic::getDeclaration(::module, Intrinsic::x86_ssse3_pabs_d_128);
6994 return RValue<Int4>(V(::builder->CreateCall(pabsd, x.value)));
6997 RValue<Short4> paddsw(RValue<Short4> x, RValue<Short4> y)
6999 llvm::Function *paddsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padds_w);
7001 return As<Short4>(V(::builder->CreateCall2(paddsw, As<MMX>(x).value, As<MMX>(y).value)));
7004 RValue<Short4> psubsw(RValue<Short4> x, RValue<Short4> y)
7006 llvm::Function *psubsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubs_w);
7008 return As<Short4>(V(::builder->CreateCall2(psubsw, As<MMX>(x).value, As<MMX>(y).value)));
7011 RValue<UShort4> paddusw(RValue<UShort4> x, RValue<UShort4> y)
7013 llvm::Function *paddusw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_paddus_w);
7015 return As<UShort4>(V(::builder->CreateCall2(paddusw, As<MMX>(x).value, As<MMX>(y).value)));
7018 RValue<UShort4> psubusw(RValue<UShort4> x, RValue<UShort4> y)
7020 llvm::Function *psubusw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubus_w);
7022 return As<UShort4>(V(::builder->CreateCall2(psubusw, As<MMX>(x).value, As<MMX>(y).value)));
7025 RValue<SByte8> paddsb(RValue<SByte8> x, RValue<SByte8> y)
7027 llvm::Function *paddsb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padds_b);
7029 return As<SByte8>(V(::builder->CreateCall2(paddsb, As<MMX>(x).value, As<MMX>(y).value)));
7032 RValue<SByte8> psubsb(RValue<SByte8> x, RValue<SByte8> y)
7034 llvm::Function *psubsb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubs_b);
7036 return As<SByte8>(V(::builder->CreateCall2(psubsb, As<MMX>(x).value, As<MMX>(y).value)));
7039 RValue<Byte8> paddusb(RValue<Byte8> x, RValue<Byte8> y)
7041 llvm::Function *paddusb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_paddus_b);
7043 return As<Byte8>(V(::builder->CreateCall2(paddusb, As<MMX>(x).value, As<MMX>(y).value)));
7046 RValue<Byte8> psubusb(RValue<Byte8> x, RValue<Byte8> y)
7048 llvm::Function *psubusb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psubus_b);
7050 return As<Byte8>(V(::builder->CreateCall2(psubusb, As<MMX>(x).value, As<MMX>(y).value)));
7053 RValue<Short4> paddw(RValue<Short4> x, RValue<Short4> y)
7055 llvm::Function *paddw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padd_w);
7057 return As<Short4>(V(::builder->CreateCall2(paddw, As<MMX>(x).value, As<MMX>(y).value)));
7060 RValue<Short4> psubw(RValue<Short4> x, RValue<Short4> y)
7062 llvm::Function *psubw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psub_w);
7064 return As<Short4>(V(::builder->CreateCall2(psubw, As<MMX>(x).value, As<MMX>(y).value)));
7067 RValue<Short4> pmullw(RValue<Short4> x, RValue<Short4> y)
7069 llvm::Function *pmullw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmull_w);
7071 return As<Short4>(V(::builder->CreateCall2(pmullw, As<MMX>(x).value, As<MMX>(y).value)));
7074 RValue<Short4> pand(RValue<Short4> x, RValue<Short4> y)
7076 llvm::Function *pand = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pand);
7078 return As<Short4>(V(::builder->CreateCall2(pand, As<MMX>(x).value, As<MMX>(y).value)));
7081 RValue<Short4> por(RValue<Short4> x, RValue<Short4> y)
7083 llvm::Function *por = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_por);
7085 return As<Short4>(V(::builder->CreateCall2(por, As<MMX>(x).value, As<MMX>(y).value)));
7088 RValue<Short4> pxor(RValue<Short4> x, RValue<Short4> y)
7090 llvm::Function *pxor = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pxor);
7092 return As<Short4>(V(::builder->CreateCall2(pxor, As<MMX>(x).value, As<MMX>(y).value)));
7095 RValue<Short4> pshufw(RValue<Short4> x, unsigned char y)
7097 llvm::Function *pshufw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_pshuf_w);
7099 return As<Short4>(V(::builder->CreateCall2(pshufw, As<MMX>(x).value, V(Nucleus::createConstantByte(y)))));
7102 RValue<Int2> punpcklwd(RValue<Short4> x, RValue<Short4> y)
7104 llvm::Function *punpcklwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpcklwd);
7106 return As<Int2>(V(::builder->CreateCall2(punpcklwd, As<MMX>(x).value, As<MMX>(y).value)));
7109 RValue<Int2> punpckhwd(RValue<Short4> x, RValue<Short4> y)
7111 llvm::Function *punpckhwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckhwd);
7113 return As<Int2>(V(::builder->CreateCall2(punpckhwd, As<MMX>(x).value, As<MMX>(y).value)));
7116 RValue<Short4> pinsrw(RValue<Short4> x, RValue<Int> y, unsigned int i)
7118 llvm::Function *pinsrw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pinsr_w);
7120 return As<Short4>(V(::builder->CreateCall3(pinsrw, As<MMX>(x).value, y.value, V(Nucleus::createConstantInt(i)))));
7123 RValue<Int> pextrw(RValue<Short4> x, unsigned int i)
7125 llvm::Function *pextrw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pextr_w);
7127 return RValue<Int>(V(::builder->CreateCall2(pextrw, As<MMX>(x).value, V(Nucleus::createConstantInt(i)))));
7130 RValue<Long1> punpckldq(RValue<Int2> x, RValue<Int2> y)
7132 llvm::Function *punpckldq = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckldq);
7134 return As<Long1>(V(::builder->CreateCall2(punpckldq, As<MMX>(x).value, As<MMX>(y).value)));
7137 RValue<Long1> punpckhdq(RValue<Int2> x, RValue<Int2> y)
7139 llvm::Function *punpckhdq = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckhdq);
7141 return As<Long1>(V(::builder->CreateCall2(punpckhdq, As<MMX>(x).value, As<MMX>(y).value)));
7144 RValue<Short4> punpcklbw(RValue<Byte8> x, RValue<Byte8> y)
7146 llvm::Function *punpcklbw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpcklbw);
7148 return As<Short4>(V(::builder->CreateCall2(punpcklbw, As<MMX>(x).value, As<MMX>(y).value)));
7151 RValue<Short4> punpckhbw(RValue<Byte8> x, RValue<Byte8> y)
7153 llvm::Function *punpckhbw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_punpckhbw);
7155 return As<Short4>(V(::builder->CreateCall2(punpckhbw, As<MMX>(x).value, As<MMX>(y).value)));
7158 RValue<Byte8> paddb(RValue<Byte8> x, RValue<Byte8> y)
7160 llvm::Function *paddb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padd_b);
7162 return As<Byte8>(V(::builder->CreateCall2(paddb, As<MMX>(x).value, As<MMX>(y).value)));
7165 RValue<Byte8> psubb(RValue<Byte8> x, RValue<Byte8> y)
7167 llvm::Function *psubb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psub_b);
7169 return As<Byte8>(V(::builder->CreateCall2(psubb, As<MMX>(x).value, As<MMX>(y).value)));
7172 RValue<Int2> paddd(RValue<Int2> x, RValue<Int2> y)
7174 llvm::Function *paddd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_padd_d);
7176 return As<Int2>(V(::builder->CreateCall2(paddd, As<MMX>(x).value, As<MMX>(y).value)));
7179 RValue<Int2> psubd(RValue<Int2> x, RValue<Int2> y)
7181 llvm::Function *psubd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psub_d);
7183 return As<Int2>(V(::builder->CreateCall2(psubd, As<MMX>(x).value, As<MMX>(y).value)));
7186 RValue<UShort4> pavgw(RValue<UShort4> x, RValue<UShort4> y)
7188 llvm::Function *pavgw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pavg_w);
7190 return As<UShort4>(V(::builder->CreateCall2(pavgw, As<MMX>(x).value, As<MMX>(y).value)));
7193 RValue<Short4> pmaxsw(RValue<Short4> x, RValue<Short4> y)
7195 llvm::Function *pmaxsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmaxs_w);
7197 return As<Short4>(V(::builder->CreateCall2(pmaxsw, As<MMX>(x).value, As<MMX>(y).value)));
7200 RValue<Short4> pminsw(RValue<Short4> x, RValue<Short4> y)
7202 llvm::Function *pminsw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmins_w);
7204 return As<Short4>(V(::builder->CreateCall2(pminsw, As<MMX>(x).value, As<MMX>(y).value)));
7207 RValue<Short4> pcmpgtw(RValue<Short4> x, RValue<Short4> y)
7209 llvm::Function *pcmpgtw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpgt_w);
7211 return As<Short4>(V(::builder->CreateCall2(pcmpgtw, As<MMX>(x).value, As<MMX>(y).value)));
7214 RValue<Short4> pcmpeqw(RValue<Short4> x, RValue<Short4> y)
7216 llvm::Function *pcmpeqw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpeq_w);
7218 return As<Short4>(V(::builder->CreateCall2(pcmpeqw, As<MMX>(x).value, As<MMX>(y).value)));
7221 RValue<Byte8> pcmpgtb(RValue<SByte8> x, RValue<SByte8> y)
7223 llvm::Function *pcmpgtb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpgt_b);
7225 return As<Byte8>(V(::builder->CreateCall2(pcmpgtb, As<MMX>(x).value, As<MMX>(y).value)));
7228 RValue<Byte8> pcmpeqb(RValue<Byte8> x, RValue<Byte8> y)
7230 llvm::Function *pcmpeqb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pcmpeq_b);
7232 return As<Byte8>(V(::builder->CreateCall2(pcmpeqb, As<MMX>(x).value, As<MMX>(y).value)));
7235 RValue<Short4> packssdw(RValue<Int2> x, RValue<Int2> y)
7237 llvm::Function *packssdw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_packssdw);
7239 return As<Short4>(V(::builder->CreateCall2(packssdw, As<MMX>(x).value, As<MMX>(y).value)));
7242 RValue<Short8> packssdw(RValue<Int4> x, RValue<Int4> y)
7244 if(CPUID::supportsSSE2())
7246 llvm::Function *packssdw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_packssdw_128);
7248 return RValue<Short8>(V(::builder->CreateCall2(packssdw, x.value, y.value)));
7253 Int2 hiX = Int2(Swizzle(x, 0xEE));
7256 Int2 hiY = Int2(Swizzle(y, 0xEE));
7258 Short4 lo = x86::packssdw(loX, hiX);
7259 Short4 hi = x86::packssdw(loY, hiY);
7261 return Short8(lo, hi);
7265 RValue<SByte8> packsswb(RValue<Short4> x, RValue<Short4> y)
7267 llvm::Function *packsswb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_packsswb);
7269 return As<SByte8>(V(::builder->CreateCall2(packsswb, As<MMX>(x).value, As<MMX>(y).value)));
7272 RValue<Byte8> packuswb(RValue<UShort4> x, RValue<UShort4> y)
7274 llvm::Function *packuswb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_packuswb);
7276 return As<Byte8>(V(::builder->CreateCall2(packuswb, As<MMX>(x).value, As<MMX>(y).value)));
7279 RValue<UShort8> packusdw(RValue<UInt4> x, RValue<UInt4> y)
7281 if(CPUID::supportsSSE4_1())
7283 llvm::Function *packusdw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_packusdw);
7285 return RValue<UShort8>(V(::builder->CreateCall2(packusdw, x.value, y.value)));
7289 // FIXME: Not an exact replacement!
7290 return As<UShort8>(packssdw(As<Int4>(x - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000)), As<Int4>(y - UInt4(0x00008000, 0x00008000, 0x00008000, 0x00008000))) + Short8(0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u, 0x8000u));
7294 RValue<UShort4> psrlw(RValue<UShort4> x, unsigned char y)
7296 llvm::Function *psrlw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrli_w);
7298 return As<UShort4>(V(::builder->CreateCall2(psrlw, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
7301 RValue<UShort8> psrlw(RValue<UShort8> x, unsigned char y)
7303 llvm::Function *psrlw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrli_w);
7305 return RValue<UShort8>(V(::builder->CreateCall2(psrlw, x.value, V(Nucleus::createConstantInt(y)))));
7308 RValue<Short4> psraw(RValue<Short4> x, unsigned char y)
7310 llvm::Function *psraw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrai_w);
7312 return As<Short4>(V(::builder->CreateCall2(psraw, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
7315 RValue<Short8> psraw(RValue<Short8> x, unsigned char y)
7317 llvm::Function *psraw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrai_w);
7319 return RValue<Short8>(V(::builder->CreateCall2(psraw, x.value, V(Nucleus::createConstantInt(y)))));
7322 RValue<Short4> psllw(RValue<Short4> x, unsigned char y)
7324 llvm::Function *psllw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pslli_w);
7326 return As<Short4>(V(::builder->CreateCall2(psllw, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
7329 RValue<Short8> psllw(RValue<Short8> x, unsigned char y)
7331 llvm::Function *psllw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pslli_w);
7333 return RValue<Short8>(V(::builder->CreateCall2(psllw, x.value, V(Nucleus::createConstantInt(y)))));
7336 RValue<Int2> pslld(RValue<Int2> x, unsigned char y)
7338 llvm::Function *pslld = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pslli_d);
7340 return As<Int2>(V(::builder->CreateCall2(pslld, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
7343 RValue<Int4> pslld(RValue<Int4> x, unsigned char y)
7345 if(CPUID::supportsSSE2())
7347 llvm::Function *pslld = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pslli_d);
7349 return RValue<Int4>(V(::builder->CreateCall2(pslld, x.value, V(Nucleus::createConstantInt(y)))));
7354 Int2 hi = Int2(Swizzle(x, 0xEE));
7356 lo = x86::pslld(lo, y);
7357 hi = x86::pslld(hi, y);
7359 return Int4(lo, hi);
7363 RValue<Int2> psrad(RValue<Int2> x, unsigned char y)
7365 llvm::Function *psrad = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrai_d);
7367 return As<Int2>(V(::builder->CreateCall2(psrad, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
7370 RValue<Int4> psrad(RValue<Int4> x, unsigned char y)
7372 if(CPUID::supportsSSE2())
7374 llvm::Function *psrad = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrai_d);
7376 return RValue<Int4>(V(::builder->CreateCall2(psrad, x.value, V(Nucleus::createConstantInt(y)))));
7381 Int2 hi = Int2(Swizzle(x, 0xEE));
7383 lo = x86::psrad(lo, y);
7384 hi = x86::psrad(hi, y);
7386 return Int4(lo, hi);
7390 RValue<UInt2> psrld(RValue<UInt2> x, unsigned char y)
7392 llvm::Function *psrld = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrli_d);
7394 return As<UInt2>(V(::builder->CreateCall2(psrld, As<MMX>(x).value, V(Nucleus::createConstantInt(y)))));
7397 RValue<UInt4> psrld(RValue<UInt4> x, unsigned char y)
7399 if(CPUID::supportsSSE2())
7401 llvm::Function *psrld = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_psrli_d);
7403 return RValue<UInt4>(V(::builder->CreateCall2(psrld, x.value, V(Nucleus::createConstantInt(y)))));
7407 UInt2 lo = As<UInt2>(Int2(As<Int4>(x)));
7408 UInt2 hi = As<UInt2>(Int2(Swizzle(As<Int4>(x), 0xEE)));
7410 lo = x86::psrld(lo, y);
7411 hi = x86::psrld(hi, y);
7413 return UInt4(lo, hi);
7417 RValue<UShort4> psrlw(RValue<UShort4> x, RValue<Long1> y)
7419 llvm::Function *psrlw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrl_w);
7421 return As<UShort4>(V(::builder->CreateCall2(psrlw, As<MMX>(x).value, As<MMX>(y).value)));
7424 RValue<Short4> psraw(RValue<Short4> x, RValue<Long1> y)
7426 llvm::Function *psraw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psra_w);
7428 return As<Short4>(V(::builder->CreateCall2(psraw, As<MMX>(x).value, As<MMX>(y).value)));
7431 RValue<Short4> psllw(RValue<Short4> x, RValue<Long1> y)
7433 llvm::Function *psllw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psll_w);
7435 return As<Short4>(V(::builder->CreateCall2(psllw, As<MMX>(x).value, As<MMX>(y).value)));
7438 RValue<Int2> pslld(RValue<Int2> x, RValue<Long1> y)
7440 llvm::Function *pslld = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psll_d);
7442 return As<Int2>(V(::builder->CreateCall2(pslld, As<MMX>(x).value, As<MMX>(y).value)));
7445 RValue<UInt2> psrld(RValue<UInt2> x, RValue<Long1> y)
7447 llvm::Function *psrld = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psrl_d);
7449 return As<UInt2>(V(::builder->CreateCall2(psrld, As<MMX>(x).value, As<MMX>(y).value)));
7452 RValue<Int2> psrad(RValue<Int2> x, RValue<Long1> y)
7454 llvm::Function *psrld = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_psra_d);
7456 return As<Int2>(V(::builder->CreateCall2(psrld, As<MMX>(x).value, As<MMX>(y).value)));
7459 RValue<Int4> pmaxsd(RValue<Int4> x, RValue<Int4> y)
7461 llvm::Function *pmaxsd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmaxsd);
7463 return RValue<Int4>(V(::builder->CreateCall2(pmaxsd, x.value, y.value)));
7466 RValue<Int4> pminsd(RValue<Int4> x, RValue<Int4> y)
7468 llvm::Function *pminsd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pminsd);
7470 return RValue<Int4>(V(::builder->CreateCall2(pminsd, x.value, y.value)));
7473 RValue<UInt4> pmaxud(RValue<UInt4> x, RValue<UInt4> y)
7475 llvm::Function *pmaxud = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmaxud);
7477 return RValue<UInt4>(V(::builder->CreateCall2(pmaxud, x.value, y.value)));
7480 RValue<UInt4> pminud(RValue<UInt4> x, RValue<UInt4> y)
7482 llvm::Function *pminud = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pminud);
7484 return RValue<UInt4>(V(::builder->CreateCall2(pminud, x.value, y.value)));
7487 RValue<Short4> pmulhw(RValue<Short4> x, RValue<Short4> y)
7489 llvm::Function *pmulhw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmulh_w);
7491 return As<Short4>(V(::builder->CreateCall2(pmulhw, As<MMX>(x).value, As<MMX>(y).value)));
7494 RValue<UShort4> pmulhuw(RValue<UShort4> x, RValue<UShort4> y)
7496 llvm::Function *pmulhuw = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmulhu_w);
7498 return As<UShort4>(V(::builder->CreateCall2(pmulhuw, As<MMX>(x).value, As<MMX>(y).value)));
7501 RValue<Int2> pmaddwd(RValue<Short4> x, RValue<Short4> y)
7503 llvm::Function *pmaddwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmadd_wd);
7505 return As<Int2>(V(::builder->CreateCall2(pmaddwd, As<MMX>(x).value, As<MMX>(y).value)));
7508 RValue<Short8> pmulhw(RValue<Short8> x, RValue<Short8> y)
7510 llvm::Function *pmulhw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pmulh_w);
7512 return RValue<Short8>(V(::builder->CreateCall2(pmulhw, x.value, y.value)));
7515 RValue<UShort8> pmulhuw(RValue<UShort8> x, RValue<UShort8> y)
7517 llvm::Function *pmulhuw = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pmulhu_w);
7519 return RValue<UShort8>(V(::builder->CreateCall2(pmulhuw, x.value, y.value)));
7522 RValue<Int4> pmaddwd(RValue<Short8> x, RValue<Short8> y)
7524 llvm::Function *pmaddwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse2_pmadd_wd);
7526 return RValue<Int4>(V(::builder->CreateCall2(pmaddwd, x.value, y.value)));
7529 RValue<Int> movmskps(RValue<Float4> x)
7531 llvm::Function *movmskps = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse_movmsk_ps);
7533 return RValue<Int>(V(::builder->CreateCall(movmskps, x.value)));
7536 RValue<Int> pmovmskb(RValue<Byte8> x)
7538 llvm::Function *pmovmskb = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_pmovmskb);
7540 return RValue<Int>(V(::builder->CreateCall(pmovmskb, As<MMX>(x).value)));
7543 //RValue<Int2> movd(RValue<Pointer<Int>> x)
7545 // Value *element = Nucleus::createLoad(x.value);
7547 //// Value *int2 = UndefValue::get(Int2::getType());
7548 //// int2 = Nucleus::createInsertElement(int2, element, ConstantInt::get(Int::getType(), 0));
7550 // Value *int2 = Nucleus::createBitCast(Nucleus::createZExt(element, Long::getType()), Int2::getType());
7552 // return RValue<Int2>(int2);
7555 //RValue<Int2> movdq2q(RValue<Int4> x)
7557 // Value *long2 = Nucleus::createBitCast(x.value, T(VectorType::get(Long::getType(), 2)));
7558 // Value *element = Nucleus::createExtractElement(long2, ConstantInt::get(Int::getType(), 0));
7560 // return RValue<Int2>(Nucleus::createBitCast(element, Int2::getType()));
7563 RValue<Int4> pmovzxbd(RValue<Int4> x)
7565 llvm::Function *pmovzxbd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovzxbd);
7567 return RValue<Int4>(V(::builder->CreateCall(pmovzxbd, Nucleus::createBitCast(x.value, Byte16::getType()))));
7570 RValue<Int4> pmovsxbd(RValue<Int4> x)
7572 llvm::Function *pmovsxbd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovsxbd);
7574 return RValue<Int4>(V(::builder->CreateCall(pmovsxbd, Nucleus::createBitCast(x.value, SByte16::getType()))));
7577 RValue<Int4> pmovzxwd(RValue<Int4> x)
7579 llvm::Function *pmovzxwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovzxwd);
7581 return RValue<Int4>(V(::builder->CreateCall(pmovzxwd, Nucleus::createBitCast(x.value, UShort8::getType()))));
7584 RValue<Int4> pmovsxwd(RValue<Int4> x)
7586 llvm::Function *pmovsxwd = Intrinsic::getDeclaration(::module, Intrinsic::x86_sse41_pmovsxwd);
7588 return RValue<Int4>(V(::builder->CreateCall(pmovsxwd, Nucleus::createBitCast(x.value, Short8::getType()))));
7593 llvm::Function *emms = Intrinsic::getDeclaration(::module, Intrinsic::x86_mmx_emms);
7595 V(::builder->CreateCall(emms));