2 * Copyright (C) 2009 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * This file contains codegen for the Thumb ISA and is intended to be
21 * Codegen-$(TARGET_ARCH_VARIANT).c
25 static void genNegFloat(CompilationUnit *cUnit, RegLocation rlDest,
29 rlSrc = loadValue(cUnit, rlSrc, kFPReg);
30 rlResult = evalLoc(cUnit, rlDest, kFPReg, true);
31 newLIR2(cUnit, kThumb2Vnegs, rlResult.lowReg, rlSrc.lowReg);
32 storeValue(cUnit, rlDest, rlResult);
35 static void genNegDouble(CompilationUnit *cUnit, RegLocation rlDest,
39 rlSrc = loadValueWide(cUnit, rlSrc, kFPReg);
40 rlResult = evalLoc(cUnit, rlDest, kFPReg, true);
41 newLIR2(cUnit, kThumb2Vnegd, S2D(rlResult.lowReg, rlResult.highReg),
42 S2D(rlSrc.lowReg, rlSrc.highReg));
43 storeValueWide(cUnit, rlDest, rlResult);
47 * To avoid possible conflicts, we use a lot of temps here. Note that
48 * our usage of Thumb2 instruction forms avoids the problems with register
49 * reuse for multiply instructions prior to arm6.
51 static void genMulLong(CompilationUnit *cUnit, RegLocation rlDest,
52 RegLocation rlSrc1, RegLocation rlSrc2)
55 int resLo = allocTemp(cUnit);
56 int resHi = allocTemp(cUnit);
57 int tmp1 = allocTemp(cUnit);
59 rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
60 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
62 newLIR3(cUnit, kThumb2MulRRR, tmp1, rlSrc2.lowReg, rlSrc1.highReg);
63 newLIR4(cUnit, kThumb2Umull, resLo, resHi, rlSrc2.lowReg, rlSrc1.lowReg);
64 newLIR4(cUnit, kThumb2Mla, tmp1, rlSrc1.lowReg, rlSrc2.highReg, tmp1);
65 newLIR4(cUnit, kThumb2AddRRR, resHi, tmp1, resHi, 0);
66 freeTemp(cUnit, tmp1);
68 rlResult = getReturnLocWide(cUnit); // Just as a template, will patch
69 rlResult.lowReg = resLo;
70 rlResult.highReg = resHi;
71 storeValueWide(cUnit, rlDest, rlResult);
74 static void genLong3Addr(CompilationUnit *cUnit, OpKind firstOp,
75 OpKind secondOp, RegLocation rlDest,
76 RegLocation rlSrc1, RegLocation rlSrc2)
79 rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
80 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
81 rlResult = evalLoc(cUnit, rlDest, kCoreReg, true);
82 opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
83 opRegRegReg(cUnit, secondOp, rlResult.highReg, rlSrc1.highReg,
85 storeValueWide(cUnit, rlDest, rlResult);
88 void dvmCompilerInitializeRegAlloc(CompilationUnit *cUnit)
91 int numTemps = sizeof(coreTemps)/sizeof(int);
92 int numFPTemps = sizeof(fpTemps)/sizeof(int);
93 RegisterPool *pool = dvmCompilerNew(sizeof(*pool), true);
94 cUnit->regPool = pool;
95 pool->numCoreTemps = numTemps;
97 dvmCompilerNew(numTemps * sizeof(*cUnit->regPool->coreTemps), true);
98 pool->numFPTemps = numFPTemps;
100 dvmCompilerNew(numFPTemps * sizeof(*cUnit->regPool->FPTemps), true);
101 pool->numCoreRegs = 0;
102 pool->coreRegs = NULL;
105 initPool(pool->coreTemps, coreTemps, pool->numCoreTemps);
106 initPool(pool->FPTemps, fpTemps, pool->numFPTemps);
107 initPool(pool->coreRegs, NULL, 0);
108 initPool(pool->FPRegs, NULL, 0);
109 pool->nullCheckedRegs =
110 dvmCompilerAllocBitVector(cUnit->numSSARegs, false);
114 * Generate a Thumb2 IT instruction, which can nullify up to
115 * four subsequent instructions based on a condition and its
116 * inverse. The condition applies to the first instruction, which
117 * is executed if the condition is met. The string "guide" consists
118 * of 0 to 3 chars, and applies to the 2nd through 4th instruction.
119 * A "T" means the instruction is executed if the condition is
120 * met, and an "E" means the instruction is executed if the condition
123 static ArmLIR *genIT(CompilationUnit *cUnit, ArmConditionCode code,
127 int condBit = code & 1;
128 int altBit = condBit ^ 1;
133 //Note: case fallthroughs intentional
134 switch(strlen(guide)) {
136 mask1 = (guide[2] == 'T') ? condBit : altBit;
138 mask2 = (guide[1] == 'T') ? condBit : altBit;
140 mask3 = (guide[0] == 'T') ? condBit : altBit;
148 mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
149 (1 << (3 - strlen(guide)));
150 return newLIR2(cUnit, kThumb2It, code, mask);
153 /* Export the Dalvik PC assicated with an instruction to the StackSave area */
154 static ArmLIR *genExportPC(CompilationUnit *cUnit, MIR *mir)
157 int offset = offsetof(StackSaveArea, xtra.currentPc);
158 int rDPC = allocTemp(cUnit);
159 res = loadConstant(cUnit, rDPC, (int) (cUnit->method->insns + mir->offset));
160 newLIR3(cUnit, kThumb2StrRRI8Predec, rDPC, rFP,
161 sizeof(StackSaveArea) - offset);
162 freeTemp(cUnit, rDPC);
167 * Handle simple case (thin lock) inline. If it's complicated, bail
168 * out to the heavyweight lock/unlock routines. We'll use dedicated
169 * registers here in order to be in the right position in case we
170 * to bail to dvm[Lock/Unlock]Object(self, object)
172 * r0 -> self pointer [arg0 for dvm[Lock/Unlock]Object
173 * r1 -> object [arg1 for dvm[Lock/Unlock]Object
174 * r2 -> intial contents of object->lock.thin, later result of strex
175 * r3 -> self->threadId
176 * r7 -> temp to hold new lock value [unlock only]
177 * r4 -> allow to be used by utilities as general temp
179 * The result of the strex is 0 if we acquire the lock.
181 static void genMonitor(CompilationUnit *cUnit, MIR *mir)
183 #if defined (THIN_LOCKING)
184 RegLocation rlSrc = getSrcLoc(cUnit, mir, 0);
185 bool enter = (mir->dalvikInsn.opCode == OP_MONITOR_ENTER);
189 loadValueDirectFixed(cUnit, rlSrc, r1); // Get obj
190 lockAllTemps(cUnit); // Prepare for explicit register usage
191 freeTemp(cUnit, r4PC); // Free up r4 for general use
192 loadWordDisp(cUnit, rGLUE, offsetof(InterpState, self), r0); // Get self
193 genNullCheck(cUnit, rlSrc.sRegLow, r1, mir->offset, NULL);
194 loadWordDisp(cUnit, r0, offsetof(Thread, threadId), r3); // Get threadId
195 newLIR3(cUnit, kThumb2Ldrex, r2, r1,
196 offsetof(Object, lock.thin) >> 2); // Get object->lock.thin
197 // Is lock.thin unheld on lock or held by us (==threadId) on unlock?
199 opRegImm(cUnit, kOpSub, r2, DVM_LOCK_INITIAL_THIN_VALUE);
201 loadConstant(cUnit, r7, DVM_LOCK_INITIAL_THIN_VALUE);
202 opRegReg(cUnit, kOpSub, r2, r3);
204 // Note: start of IT block. If last sub result != clear, else strex
205 genIT(cUnit, kArmCondNe, "E");
206 newLIR0(cUnit, kThumb2Clrex);
208 newLIR4(cUnit, kThumb2Strex, r2, r3, r1,
209 offsetof(Object, lock.thin) >> 2);
211 newLIR4(cUnit, kThumb2Strex, r2, r7, r1,
212 offsetof(Object, lock.thin) >> 2);
214 // Note: end of IT block
216 branch = newLIR2(cUnit, kThumb2Cbz, r2, 0);
219 loadConstant(cUnit, r7, (int)dvmLockObject);
221 loadConstant(cUnit, r7, (int)dvmUnlockObject);
223 genExportPC(cUnit, mir);
224 opReg(cUnit, kOpBlx, r7);
226 clobberCallRegs(cUnit);
229 target = newLIR0(cUnit, kArmPseudoTargetLabel);
230 target->defMask = ENCODE_ALL;
231 branch->generic.target = (LIR *)target;
233 genMonitorPortable(cUnit, mir);
238 * 64-bit 3way compare function.
243 * sub r7, op1lo, op2lo (treat as unsigned)
252 static void genCmpLong(CompilationUnit *cUnit, MIR *mir,
253 RegLocation rlDest, RegLocation rlSrc1,
256 RegLocation rlTemp = LOC_C_RETURN; // Just using as template, will change
259 rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
260 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
261 rlTemp.lowReg = allocTemp(cUnit);
262 loadConstant(cUnit, rlTemp.lowReg, -1);
263 opRegReg(cUnit, kOpCmp, rlSrc1.highReg, rlSrc2.highReg);
264 ArmLIR *branch1 = opCondBranch(cUnit, kArmCondLt);
265 ArmLIR *branch2 = opCondBranch(cUnit, kArmCondGt);
266 opRegRegReg(cUnit, kOpSub, rlTemp.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
267 ArmLIR *branch3 = opCondBranch(cUnit, kArmCondEq);
269 genIT(cUnit, kArmCondHi, "E");
270 newLIR2(cUnit, kThumb2MovImmShift, rlTemp.lowReg, modifiedImmediate(-1));
271 loadConstant(cUnit, rlTemp.lowReg, 1);
274 target2 = newLIR0(cUnit, kArmPseudoTargetLabel);
275 target2->defMask = -1;
276 opRegReg(cUnit, kOpNeg, rlTemp.lowReg, rlTemp.lowReg);
278 target1 = newLIR0(cUnit, kArmPseudoTargetLabel);
279 target1->defMask = -1;
281 storeValue(cUnit, rlDest, rlTemp);
283 branch1->generic.target = (LIR *)target1;
284 branch2->generic.target = (LIR *)target2;
285 branch3->generic.target = branch1->generic.target;
288 static bool genInlinedStringLength(CompilationUnit *cUnit, MIR *mir)
290 RegLocation rlObj = getSrcLoc(cUnit, mir, 0);
291 RegLocation rlDest = inlinedTarget(cUnit, mir, false);
292 rlObj = loadValue(cUnit, rlObj, kCoreReg);
293 RegLocation rlResult = evalLoc(cUnit, rlDest, kCoreReg, true);
294 genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset, NULL);
295 loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_count,
297 storeValue(cUnit, rlDest, rlResult);
301 static bool genInlinedStringCharAt(CompilationUnit *cUnit, MIR *mir)
303 int contents = offsetof(ArrayObject, contents);
304 RegLocation rlObj = getSrcLoc(cUnit, mir, 0);
305 RegLocation rlIdx = getSrcLoc(cUnit, mir, 1);
306 RegLocation rlDest = inlinedTarget(cUnit, mir, false);
307 RegLocation rlResult;
308 rlObj = loadValue(cUnit, rlObj, kCoreReg);
309 rlIdx = loadValue(cUnit, rlIdx, kCoreReg);
310 int regMax = allocTemp(cUnit);
311 int regOff = allocTemp(cUnit);
312 int regPtr = allocTemp(cUnit);
313 ArmLIR *pcrLabel = genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg,
315 loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_count, regMax);
316 loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_offset, regOff);
317 loadWordDisp(cUnit, rlObj.lowReg, gDvm.offJavaLangString_value, regPtr);
318 genBoundsCheck(cUnit, rlIdx.lowReg, regMax, mir->offset, pcrLabel);
319 freeTemp(cUnit, regMax);
320 opRegImm(cUnit, kOpAdd, regPtr, contents);
321 opRegReg(cUnit, kOpAdd, regOff, rlIdx.lowReg);
322 rlResult = evalLoc(cUnit, rlDest, kCoreReg, true);
323 loadBaseIndexed(cUnit, regPtr, regOff, rlResult.lowReg, 1, kUnsignedHalf);
324 storeValue(cUnit, rlDest, rlResult);
328 static bool genInlinedAbsInt(CompilationUnit *cUnit, MIR *mir)
330 RegLocation rlSrc = getSrcLoc(cUnit, mir, 0);
331 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
332 RegLocation rlDest = inlinedTarget(cUnit, mir, false);;
333 RegLocation rlResult = evalLoc(cUnit, rlDest, kCoreReg, true);
334 int signReg = allocTemp(cUnit);
336 * abs(x) = y<=x>>31, (x+y)^y.
337 * Thumb2's IT block also yields 3 instructions, but imposes
338 * scheduling constraints.
340 opRegRegImm(cUnit, kOpAsr, signReg, rlSrc.lowReg, 31);
341 opRegRegReg(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, signReg);
342 opRegReg(cUnit, kOpXor, rlResult.lowReg, signReg);
343 storeValue(cUnit, rlDest, rlResult);
347 static bool genInlinedAbsFloat(CompilationUnit *cUnit, MIR *mir)
349 RegLocation rlSrc = getSrcLoc(cUnit, mir, 0);
350 RegLocation rlDest = inlinedTarget(cUnit, mir, true);
351 rlSrc = loadValue(cUnit, rlSrc, kFPReg);
352 RegLocation rlResult = evalLoc(cUnit, rlDest, kFPReg, true);
353 newLIR2(cUnit, kThumb2Vabss, rlResult.lowReg, rlSrc.lowReg);
354 storeValue(cUnit, rlDest, rlResult);
358 static bool genInlinedAbsDouble(CompilationUnit *cUnit, MIR *mir)
360 RegLocation rlSrc = getSrcLocWide(cUnit, mir, 0, 1);
361 RegLocation rlDest = inlinedTargetWide(cUnit, mir, true);
362 rlSrc = loadValueWide(cUnit, rlSrc, kFPReg);
363 RegLocation rlResult = evalLoc(cUnit, rlDest, kFPReg, true);
364 newLIR2(cUnit, kThumb2Vabsd, S2D(rlResult.lowReg, rlResult.highReg),
365 S2D(rlSrc.lowReg, rlSrc.highReg));
366 storeValueWide(cUnit, rlDest, rlResult);
370 static bool genInlinedMinMaxInt(CompilationUnit *cUnit, MIR *mir, bool isMin)
372 RegLocation rlSrc1 = getSrcLoc(cUnit, mir, 0);
373 RegLocation rlSrc2 = getSrcLoc(cUnit, mir, 1);
374 rlSrc1 = loadValue(cUnit, rlSrc1, kCoreReg);
375 rlSrc2 = loadValue(cUnit, rlSrc2, kCoreReg);
376 RegLocation rlDest = inlinedTarget(cUnit, mir, false);
377 RegLocation rlResult = evalLoc(cUnit, rlDest, kCoreReg, true);
378 opRegReg(cUnit, kOpCmp, rlSrc1.lowReg, rlSrc2.lowReg);
379 genIT(cUnit, (isMin) ? kArmCondGt : kArmCondLt, "E");
380 opRegReg(cUnit, kOpMov, rlResult.lowReg, rlSrc2.lowReg);
381 opRegReg(cUnit, kOpMov, rlResult.lowReg, rlSrc1.lowReg);
383 storeValue(cUnit, rlDest, rlResult);
387 static bool genInlinedAbsLong(CompilationUnit *cUnit, MIR *mir)
389 RegLocation rlSrc = getSrcLocWide(cUnit, mir, 0, 1);
390 RegLocation rlDest = inlinedTargetWide(cUnit, mir, false);
391 rlSrc = loadValueWide(cUnit, rlSrc, kCoreReg);
392 RegLocation rlResult = evalLoc(cUnit, rlDest, kCoreReg, true);
393 int signReg = allocTemp(cUnit);
395 * abs(x) = y<=x>>31, (x+y)^y.
396 * Thumb2 IT block allows slightly shorter sequence,
397 * but introduces a scheduling barrier. Stick with this
400 opRegRegImm(cUnit, kOpAsr, signReg, rlSrc.highReg, 31);
401 opRegRegReg(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, signReg);
402 opRegRegReg(cUnit, kOpAdc, rlResult.highReg, rlSrc.highReg, signReg);
403 opRegReg(cUnit, kOpXor, rlResult.lowReg, signReg);
404 opRegReg(cUnit, kOpXor, rlResult.highReg, signReg);
405 storeValueWide(cUnit, rlDest, rlResult);