std::ostream& operator<<(std::ostream& os, const DividePattern& pattern);
-// Memory barrier types (see "The JSR-133 Cookbook for Compiler Writers").
+/**
+ * @brief Memory barrier types (see "The JSR-133 Cookbook for Compiler Writers").
+ * @details Without context sensitive analysis, the most conservative set of barriers
+ * must be issued to ensure the Java Memory Model. Thus the recipe is as follows:
+ * -# Use StoreStore barrier before volatile store.
+ * -# Use StoreLoad barrier after volatile store.
+ * -# Use LoadLoad and LoadStore barrier after each volatile load.
+ * -# Use StoreStore barrier after all stores but before return from any constructor whose
+ * class has final fields.
+ */
enum MemBarrierKind {
kLoadStore,
kLoadLoad,
kPCRelFixup, // x86 FIXME: add NEEDS_FIXUP to instruction attributes.
kRegDef0,
kRegDef1,
+ kRegDef2,
kRegDefA,
kRegDefD,
kRegDefFPCSList0,
void ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
#if ANDROID_SMP != 0
+ // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
+ LIR* barrier = last_lir_insn_;
+
int dmb_flavor;
// TODO: revisit Arm barrier kinds
switch (barrier_kind) {
dmb_flavor = kSY; // quiet gcc.
break;
}
- LIR* dmb = NewLIR1(kThumb2Dmb, dmb_flavor);
- dmb->u.m.def_mask = ENCODE_ALL;
+
+ // If the same barrier already exists, don't generate another.
+ if (barrier == nullptr
+ || (barrier != nullptr && (barrier->opcode != kThumb2Dmb || barrier->operands[0] != dmb_flavor))) {
+ barrier = NewLIR1(kThumb2Dmb, dmb_flavor);
+ }
+
+ // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
+ DCHECK(!barrier->flags.use_def_invalid);
+ barrier->u.m.def_mask = ENCODE_ALL;
#endif
}
rl_src = LoadValue(rl_src, kAnyReg);
}
if (field_info.IsVolatile()) {
+ // There might have been a store before this volatile one so insert StoreStore barrier.
GenMemBarrier(kStoreStore);
}
if (is_long_or_double) {
StoreWordDisp(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg.GetReg());
}
if (field_info.IsVolatile()) {
+ // A load might follow the volatile store so insert a StoreLoad barrier.
GenMemBarrier(kStoreLoad);
}
if (is_object && !mir_graph_->IsConstantNullRef(rl_src)) {
}
// r_base now holds static storage base
RegLocation rl_result = EvalLoc(rl_dest, kAnyReg, true);
- if (field_info.IsVolatile()) {
- GenMemBarrier(kLoadLoad);
- }
+
if (is_long_or_double) {
LoadBaseDispWide(r_base, field_info.FieldOffset().Int32Value(), rl_result.reg.GetReg(),
rl_result.reg.GetHighReg(), INVALID_SREG);
LoadWordDisp(r_base, field_info.FieldOffset().Int32Value(), rl_result.reg.GetReg());
}
FreeTemp(r_base);
+
+ if (field_info.IsVolatile()) {
+ // Without context sensitive analysis, we must issue the most conservative barriers.
+ // In this case, either a load or store may follow so we issue both barriers.
+ GenMemBarrier(kLoadLoad);
+ GenMemBarrier(kLoadStore);
+ }
+
if (is_long_or_double) {
StoreValueWide(rl_dest, rl_result);
} else {
rl_result.reg.GetHighReg(), rl_obj.s_reg_low);
MarkPossibleNullPointerException(opt_flags);
if (field_info.IsVolatile()) {
+ // Without context sensitive analysis, we must issue the most conservative barriers.
+ // In this case, either a load or store may follow so we issue both barriers.
GenMemBarrier(kLoadLoad);
+ GenMemBarrier(kLoadStore);
}
} else {
int reg_ptr = AllocTemp();
LoadBaseDispWide(reg_ptr, 0, rl_result.reg.GetReg(), rl_result.reg.GetHighReg(),
INVALID_SREG);
if (field_info.IsVolatile()) {
+ // Without context sensitive analysis, we must issue the most conservative barriers.
+ // In this case, either a load or store may follow so we issue both barriers.
GenMemBarrier(kLoadLoad);
+ GenMemBarrier(kLoadStore);
}
FreeTemp(reg_ptr);
}
rl_result.reg.GetReg(), kWord, rl_obj.s_reg_low);
MarkPossibleNullPointerException(opt_flags);
if (field_info.IsVolatile()) {
+ // Without context sensitive analysis, we must issue the most conservative barriers.
+ // In this case, either a load or store may follow so we issue both barriers.
GenMemBarrier(kLoadLoad);
+ GenMemBarrier(kLoadStore);
}
StoreValue(rl_dest, rl_result);
}
reg_ptr = AllocTemp();
OpRegRegImm(kOpAdd, reg_ptr, rl_obj.reg.GetReg(), field_info.FieldOffset().Int32Value());
if (field_info.IsVolatile()) {
+ // There might have been a store before this volatile one so insert StoreStore barrier.
GenMemBarrier(kStoreStore);
}
StoreBaseDispWide(reg_ptr, 0, rl_src.reg.GetReg(), rl_src.reg.GetHighReg());
MarkPossibleNullPointerException(opt_flags);
if (field_info.IsVolatile()) {
- GenMemBarrier(kLoadLoad);
+ // A load might follow the volatile store so insert a StoreLoad barrier.
+ GenMemBarrier(kStoreLoad);
}
FreeTemp(reg_ptr);
} else {
rl_src = LoadValue(rl_src, reg_class);
GenNullCheck(rl_obj.reg.GetReg(), opt_flags);
if (field_info.IsVolatile()) {
+ // There might have been a store before this volatile one so insert StoreStore barrier.
GenMemBarrier(kStoreStore);
}
StoreBaseDisp(rl_obj.reg.GetReg(), field_info.FieldOffset().Int32Value(),
rl_src.reg.GetReg(), kWord);
MarkPossibleNullPointerException(opt_flags);
if (field_info.IsVolatile()) {
- GenMemBarrier(kLoadLoad);
+ // A load might follow the volatile store so insert a StoreLoad barrier.
+ GenMemBarrier(kStoreLoad);
}
if (is_object && !mir_graph_->IsConstantNullRef(rl_src)) {
MarkGCCard(rl_src.reg.GetReg(), rl_obj.reg.GetReg());
RegLocation rl_src_offset = info->args[2]; // long low
rl_src_offset.wide = 0; // ignore high half in info->args[3]
RegLocation rl_dest = is_long ? InlineTargetWide(info) : InlineTarget(info); // result reg
- if (is_volatile) {
- GenMemBarrier(kLoadLoad);
- }
+
RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
if (is_long) {
OpRegReg(kOpAdd, rl_object.reg.GetReg(), rl_offset.reg.GetReg());
LoadBaseDispWide(rl_object.reg.GetReg(), 0, rl_result.reg.GetReg(), rl_result.reg.GetHighReg(), INVALID_SREG);
- StoreValueWide(rl_dest, rl_result);
} else {
LoadBaseIndexed(rl_object.reg.GetReg(), rl_offset.reg.GetReg(), rl_result.reg.GetReg(), 0, kWord);
+ }
+
+ if (is_volatile) {
+ // Without context sensitive analysis, we must issue the most conservative barriers.
+ // In this case, either a load or store may follow so we issue both barriers.
+ GenMemBarrier(kLoadLoad);
+ GenMemBarrier(kLoadStore);
+ }
+
+ if (is_long) {
+ StoreValueWide(rl_dest, rl_result);
+ } else {
StoreValue(rl_dest, rl_result);
}
return true;
rl_src_offset.wide = 0; // ignore high half in info->args[3]
RegLocation rl_src_value = info->args[4]; // value to store
if (is_volatile || is_ordered) {
+ // There might have been a store before this volatile one so insert StoreStore barrier.
GenMemBarrier(kStoreStore);
}
RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
// Free up the temp early, to ensure x86 doesn't run out of temporaries in MarkGCCard.
FreeTemp(rl_offset.reg.GetReg());
+
if (is_volatile) {
+ // A load might follow the volatile store so insert a StoreLoad barrier.
GenMemBarrier(kStoreLoad);
}
if (is_object) {
((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) || // Skip wide loads.
((target_flags & (REG_USE0 | REG_USE1 | REG_USE2)) ==
(REG_USE0 | REG_USE1 | REG_USE2)) || // Skip wide stores.
- !(target_flags & (IS_LOAD | IS_STORE))) {
+ // Skip instructions that are neither loads or stores.
+ !(target_flags & (IS_LOAD | IS_STORE)) ||
+ // Skip instructions that do both load and store.
+ ((target_flags & (IS_STORE | IS_LOAD)) == (IS_STORE | IS_LOAD))) {
continue;
}
/* Skip non-interesting instructions */
if (!(target_flags & IS_LOAD) ||
(this_lir->flags.is_nop == true) ||
- ((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1))) {
+ ((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) ||
+ ((target_flags & (IS_STORE | IS_LOAD)) == (IS_STORE | IS_LOAD))) {
continue;
}
SetupRegMask(&lir->u.m.def_mask, lir->operands[1]);
}
+ if (flags & REG_DEF2) {
+ SetupRegMask(&lir->u.m.def_mask, lir->operands[2]);
+ }
+
if (flags & REG_USE0) {
SetupRegMask(&lir->u.m.use_mask, lir->operands[0]);
}
LoadBaseDisp(reg_obj, data.field_offset, rl_dest.reg.GetReg(), kWord, INVALID_SREG);
}
if (data.is_volatile) {
+ // Without context sensitive analysis, we must issue the most conservative barriers.
+ // In this case, either a load or store may follow so we issue both barriers.
GenMemBarrier(kLoadLoad);
+ GenMemBarrier(kLoadStore);
}
return true;
}
int reg_obj = LoadArg(data.object_arg);
int reg_src = LoadArg(data.src_arg, wide);
if (data.is_volatile) {
+ // There might have been a store before this volatile one so insert StoreStore barrier.
GenMemBarrier(kStoreStore);
}
if (wide) {
StoreBaseDisp(reg_obj, data.field_offset, reg_src, kWord);
}
if (data.is_volatile) {
- GenMemBarrier(kLoadLoad);
+ // A load might follow the volatile store so insert a StoreLoad barrier.
+ GenMemBarrier(kStoreLoad);
}
if (data.op_variant == InlineMethodAnalyser::IPutVariant(Instruction::IPUT_OBJECT)) {
MarkGCCard(reg_src, reg_obj);
#define NO_OPERAND (1ULL << kNoOperand)
#define REG_DEF0 (1ULL << kRegDef0)
#define REG_DEF1 (1ULL << kRegDef1)
+#define REG_DEF2 (1ULL << kRegDef2)
#define REG_DEFA (1ULL << kRegDefA)
#define REG_DEFD (1ULL << kRegDefD)
#define REG_DEF_FPCS_LIST0 (1ULL << kRegDefFPCSList0)
*/
virtual void GenSelect(BasicBlock* bb, MIR* mir) = 0;
+ /**
+ * @brief Used to generate a memory barrier in an architecture specific way.
+ * @details The last generated LIR will be considered for use as barrier. Namely,
+ * if the last LIR can be updated in a way where it will serve the semantics of
+ * barrier, then it will be used as such. Otherwise, a new LIR will be generated
+ * that can keep the semantics.
+ * @param barrier_kind The kind of memory barrier to generate.
+ */
virtual void GenMemBarrier(MemBarrierKind barrier_kind) = 0;
+
virtual void GenMoveException(RegLocation rl_dest) = 0;
virtual void GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
RegLocation rl_result, int lit, int first_bit,
{ kX86LockCmpxchgAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Lock Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" },
{ kX86LockCmpxchg8bM, kMem, IS_STORE | IS_BINARY_OP | REG_USE0 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES, { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0 }, "Lock Cmpxchg8b", "[!0r+!1d]" },
{ kX86LockCmpxchg8bA, kArray, IS_STORE | IS_QUAD_OP | REG_USE01 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES, { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0 }, "Lock Cmpxchg8b", "[!0r+!1r<<!2d+!3d]" },
+ { kX86XchgMR, kMemReg, IS_STORE | IS_LOAD | IS_TERTIARY_OP | REG_DEF2 | REG_USE02, { 0, 0, 0x87, 0, 0, 0, 0, 0 }, "Xchg", "[!0r+!1d],!2r" },
EXT_0F_ENCODING_MAP(Movzx8, 0x00, 0xB6, REG_DEF0),
EXT_0F_ENCODING_MAP(Movzx16, 0x00, 0xB7, REG_DEF0),
void OpVectorRegCopyWide(uint8_t fp_reg, uint8_t low_reg, uint8_t high_reg);
void GenConstWide(RegLocation rl_dest, int64_t value);
+ static bool ProvidesFullMemoryBarrier(X86OpCode opcode);
+
/*
* @brief generate inline code for fast case of Strng.indexOf.
* @param info Call parameters
: (SRegOffset(rl_src_offset.s_reg_low) + push_offset));
LoadWordDisp(TargetReg(kSp), srcOffsetSp, rSI);
NewLIR4(kX86LockCmpxchg8bA, rDI, rSI, 0, 0);
+
+ // After a store we need to insert barrier in case of potential load. Since the
+ // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
+ GenMemBarrier(kStoreLoad);
+
FreeTemp(rSI);
UnmarkTemp(rSI);
NewLIR1(kX86Pop32R, rSI);
FlushReg(r0);
LockTemp(r0);
- // Release store semantics, get the barrier out of the way. TODO: revisit
- GenMemBarrier(kStoreLoad);
-
RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
RegLocation rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
LoadValueDirect(rl_src_expected, r0);
NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0, rl_new_value.reg.GetReg());
+ // After a store we need to insert barrier in case of potential load. Since the
+ // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
+ GenMemBarrier(kStoreLoad);
+
FreeTemp(r0);
}
FreeTemp(rX86_ARG3);
}
+bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
+ switch (opcode) {
+ case kX86LockCmpxchgMR:
+ case kX86LockCmpxchgAR:
+ case kX86LockCmpxchg8bM:
+ case kX86LockCmpxchg8bA:
+ case kX86XchgMR:
+ case kX86Mfence:
+ // Atomic memory instructions provide full barrier.
+ return true;
+ default:
+ break;
+ }
+
+ // Conservative if cannot prove it provides full barrier.
+ return false;
+}
+
void X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
#if ANDROID_SMP != 0
- // TODO: optimize fences
- NewLIR0(kX86Mfence);
+ // Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
+ LIR* mem_barrier = last_lir_insn_;
+
+ /*
+ * According to the JSR-133 Cookbook, for x86 only StoreLoad barriers need memory fence. All other barriers
+ * (LoadLoad, LoadStore, StoreStore) are nops due to the x86 memory model. For those cases, all we need
+ * to ensure is that there is a scheduling barrier in place.
+ */
+ if (barrier_kind == kStoreLoad) {
+ // If no LIR exists already that can be used a barrier, then generate an mfence.
+ if (mem_barrier == nullptr) {
+ mem_barrier = NewLIR0(kX86Mfence);
+ }
+
+ // If last instruction does not provide full barrier, then insert an mfence.
+ if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) {
+ mem_barrier = NewLIR0(kX86Mfence);
+ }
+ }
+
+ // Now ensure that a scheduling barrier is in place.
+ if (mem_barrier == nullptr) {
+ GenBarrier();
+ } else {
+ // Mark as a scheduling barrier.
+ DCHECK(!mem_barrier->flags.use_def_invalid);
+ mem_barrier->u.m.def_mask = ENCODE_ALL;
+ }
#endif
}
kX86CmpxchgRR, kX86CmpxchgMR, kX86CmpxchgAR, // compare and exchange
kX86LockCmpxchgMR, kX86LockCmpxchgAR, // locked compare and exchange
kX86LockCmpxchg8bM, kX86LockCmpxchg8bA, // locked compare and exchange
+ kX86XchgMR, // exchange memory with register (automatically locked)
Binary0fOpCode(kX86Movzx8), // zero-extend 8-bit value
Binary0fOpCode(kX86Movzx16), // zero-extend 16-bit value
Binary0fOpCode(kX86Movsx8), // sign-extend 8-bit value
opcode << "j" << condition_codes[*instr & 0xF];
branch_bytes = 1;
break;
+ case 0x86: case 0x87:
+ opcode << "xchg";
+ store = true;
+ has_modrm = true;
+ byte_operand = (*instr == 0x86);
+ break;
case 0x88: opcode << "mov"; store = true; has_modrm = true; byte_operand = true; break;
case 0x89: opcode << "mov"; store = true; has_modrm = true; break;
case 0x8A: opcode << "mov"; load = true; has_modrm = true; byte_operand = true; break;