arch/arc/mm/cache_arc700.c

   1 /*
   2  * ARC700 VIPT Cache Management
   3  *
   4  * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License version 2 as
   8  * published by the Free Software Foundation.
   9  *
  10  *  vineetg: May 2011: for Non-aliasing VIPT D-cache following can be NOPs
  11  *   -flush_cache_dup_mm (fork)
  12  *   -likewise for flush_cache_mm (exit/execve)
  13  *   -likewise for flush_cache_range,flush_cache_page (munmap, exit, COW-break)
  14  *
  15  * vineetg: Apr 2011
  16  *  -Now that MMU can support larger pg sz (16K), the determiniation of
  17  *   aliasing shd not be based on assumption of 8k pg
  18  *
  19  * vineetg: Mar 2011
  20  *  -optimised version of flush_icache_range( ) for making I/D coherent
  21  *   when vaddr is available (agnostic of num of aliases)
  22  *
  23  * vineetg: Mar 2011
  24  *  -Added documentation about I-cache aliasing on ARC700 and the way it
  25  *   was handled up until MMU V2.
  26  *  -Spotted a three year old bug when killing the 4 aliases, which needs
  27  *   bottom 2 bits, so we need to do paddr | {0x00, 0x01, 0x02, 0x03}
  28  *                        instead of paddr | {0x00, 0x01, 0x10, 0x11}
  29  *   (Rajesh you owe me one now)
  30  *
  31  * vineetg: Dec 2010
  32  *  -Off-by-one error when computing num_of_lines to flush
  33  *   This broke signal handling with bionic which uses synthetic sigret stub
  34  *
  35  * vineetg: Mar 2010
  36  *  -GCC can't generate ZOL for core cache flush loops.
  37  *   Conv them into iterations based as opposed to while (start < end) types
  38  *
  39  * Vineetg: July 2009
  40  *  -In I-cache flush routine we used to chk for aliasing for every line INV.
  41  *   Instead now we setup routines per cache geometry and invoke them
  42  *   via function pointers.
  43  *
  44  * Vineetg: Jan 2009
  45  *  -Cache Line flush routines used to flush an extra line beyond end addr
  46  *   because check was while (end >= start) instead of (end > start)
  47  *     =Some call sites had to work around by doing -1, -4 etc to end param
  48  *     =Some callers didnt care. This was spec bad in case of INV routines
  49  *      which would discard valid data (cause of the horrible ext2 bug
  50  *      in ARC IDE driver)
  51  *
  52  * vineetg: June 11th 2008: Fixed flush_icache_range( )
  53  *  -Since ARC700 caches are not coherent (I$ doesnt snoop D$) both need
  54  *   to be flushed, which it was not doing.
  55  *  -load_module( ) passes vmalloc addr (Kernel Virtual Addr) to the API,
  56  *   however ARC cache maintenance OPs require PHY addr. Thus need to do
  57  *   vmalloc_to_phy.
  58  *  -Also added optimisation there, that for range > PAGE SIZE we flush the
  59  *   entire cache in one shot rather than line by line. For e.g. a module
  60  *   with Code sz 600k, old code flushed 600k worth of cache (line-by-line),
  61  *   while cache is only 16 or 32k.
  62  */
  63
  64 #include <linux/module.h>
  65 #include <linux/mm.h>
  66 #include <linux/sched.h>
  67 #include <linux/cache.h>
  68 #include <linux/mmu_context.h>
  69 #include <linux/syscalls.h>
  70 #include <linux/uaccess.h>
  71 #include <asm/cacheflush.h>
  72 #include <asm/cachectl.h>
  73 #include <asm/setup.h>
  74
  75
  76 #ifdef CONFIG_ARC_HAS_ICACHE
  77 static void __ic_line_inv_no_alias(unsigned long, int);
  78 static void __ic_line_inv_2_alias(unsigned long, int);
  79 static void __ic_line_inv_4_alias(unsigned long, int);
  80
  81 /* Holds the ptr to flush routine, dependign on size due to aliasing issues */
  82 static void (*___flush_icache_rtn) (unsigned long, int);
  83 #endif
  84
  85 /*
  86  * Read the Cache Build Confuration Registers, Decode them and save into
  87  * the cpuinfo structure for later use.
  88  * No Validation done here, simply read/convert the BCRs
  89  */
  90 void __init read_decode_cache_bcr(void)
  91 {
  92         struct bcr_cache ibcr, dbcr;
  93         struct cpuinfo_arc_cache *p_ic, *p_dc;
  94         unsigned int cpu = smp_processor_id();
  95
  96         p_ic = &cpuinfo_arc700[cpu].icache;
  97         READ_BCR(ARC_REG_IC_BCR, ibcr);
  98
  99         if (ibcr.config == 0x3)
 100                 p_ic->assoc = 2;
 101         p_ic->line_len = 8 << ibcr.line_len;
 102         p_ic->sz = 0x200 << ibcr.sz;
 103         p_ic->ver = ibcr.ver;
 104
 105         p_dc = &cpuinfo_arc700[cpu].dcache;
 106         READ_BCR(ARC_REG_DC_BCR, dbcr);
 107
 108         if (dbcr.config == 0x2)
 109                 p_dc->assoc = 4;
 110         p_dc->line_len = 16 << dbcr.line_len;
 111         p_dc->sz = 0x200 << dbcr.sz;
 112         p_dc->ver = dbcr.ver;
 113 }
 114
 115 /*
 116  * 1. Validate the Cache Geomtery (compile time config matches hardware)
 117  * 2. If I-cache suffers from aliasing, setup work arounds (difft flush rtn)
 118  *    (aliasing D-cache configurations are not supported YET)
 119  * 3. Enable the Caches, setup default flush mode for D-Cache
 120  * 3. Calculate the SHMLBA used by user space
 121  */
 122 void __init arc_cache_init(void)
 123 {
 124         unsigned int temp;
 125 #ifdef CONFIG_ARC_CACHE
 126         unsigned int cpu = smp_processor_id();
 127 #endif
 128 #ifdef CONFIG_ARC_HAS_ICACHE
 129         struct cpuinfo_arc_cache *ic;
 130 #endif
 131 #ifdef CONFIG_ARC_HAS_DCACHE
 132         struct cpuinfo_arc_cache *dc;
 133 #endif
 134         int way_pg_ratio = way_pg_ratio;
 135
 136 #ifdef CONFIG_ARC_HAS_ICACHE
 137         ic = &cpuinfo_arc700[cpu].icache;
 138
 139         /*
 140          * if Cache way size is <= page size then no aliasing exhibited
 141          * otherwise ratio determines num of aliases.
 142          * e.g. 32K I$, 2 way set assoc, 8k pg size
 143          *       way-sz = 32k/2 = 16k
 144          *       way-pg-ratio = 16k/8k = 2, so 2 aliases possible
 145          *       (meaning 1 line could be in 2 possible locations).
 146          */
 147         way_pg_ratio = ic->sz / ARC_ICACHE_WAYS / PAGE_SIZE;
 148         switch (way_pg_ratio) {
 149         case 0:
 150         case 1:
 151                 ___flush_icache_rtn = __ic_line_inv_no_alias;
 152                 break;
 153         case 2:
 154                 ___flush_icache_rtn = __ic_line_inv_2_alias;
 155                 break;
 156         case 4:
 157                 ___flush_icache_rtn = __ic_line_inv_4_alias;
 158                 break;
 159         default:
 160                 panic("Unsupported I-Cache Sz\n");
 161         }
 162 #endif
 163
 164         /* Enable/disable I-Cache */
 165         temp = read_aux_reg(ARC_REG_IC_CTRL);
 166
 167 #ifdef CONFIG_ARC_HAS_ICACHE
 168         temp &= ~IC_CTRL_CACHE_DISABLE;
 169 #else
 170         temp |= IC_CTRL_CACHE_DISABLE;
 171 #endif
 172
 173         write_aux_reg(ARC_REG_IC_CTRL, temp);
 174
 175 #ifdef CONFIG_ARC_HAS_DCACHE
 176         dc = &cpuinfo_arc700[cpu].dcache;
 177
 178         /* check for D-Cache aliasing */
 179         if ((dc->sz / ARC_DCACHE_WAYS) > PAGE_SIZE)
 180                 panic("D$ aliasing not handled right now\n");
 181 #endif
 182
 183         /* Set the default Invalidate Mode to "simpy discard dirty lines"
 184          *  as this is more frequent then flush before invalidate
 185          * Ofcourse we toggle this default behviour when desired
 186          */
 187         temp = read_aux_reg(ARC_REG_DC_CTRL);
 188         temp &= ~DC_CTRL_INV_MODE_FLUSH;
 189
 190 #ifdef CONFIG_ARC_HAS_DCACHE
 191         /* Enable D-Cache: Clear Bit 0 */
 192         write_aux_reg(ARC_REG_DC_CTRL, temp & ~IC_CTRL_CACHE_DISABLE);
 193 #else
 194         /* Flush D cache */
 195         write_aux_reg(ARC_REG_DC_FLSH, 0x1);
 196         /* Disable D cache */
 197         write_aux_reg(ARC_REG_DC_CTRL, temp | IC_CTRL_CACHE_DISABLE);
 198 #endif
 199
 200         return;
 201 }
 202
 203 #define OP_INV          0x1
 204 #define OP_FLUSH        0x2
 205 #define OP_FLUSH_N_INV  0x3
 206
 207 #ifdef CONFIG_ARC_HAS_DCACHE
 208
 209 /***************************************************************
 210  * Machine specific helpers for Entire D-Cache or Per Line ops
 211  */
 212
 213 static inline void wait_for_flush(void)
 214 {
 215         while (read_aux_reg(ARC_REG_DC_CTRL) & DC_CTRL_FLUSH_STATUS)
 216                 ;
 217 }
 218
 219 /*
 220  * Operation on Entire D-Cache
 221  * @cacheop = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV}
 222  * Note that constant propagation ensures all the checks are gone
 223  * in generated code
 224  */
 225 static inline void __dc_entire_op(const int cacheop)
 226 {
 227         unsigned long flags, tmp = tmp;
 228         int aux;
 229
 230         local_irq_save(flags);
 231
 232         if (cacheop == OP_FLUSH_N_INV) {
 233                 /* Dcache provides 2 cmd: FLUSH or INV
 234                  * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
 235                  * flush-n-inv is achieved by INV cmd but with IM=1
 236                  * Default INV sub-mode is DISCARD, which needs to be toggled
 237                  */
 238                 tmp = read_aux_reg(ARC_REG_DC_CTRL);
 239                 write_aux_reg(ARC_REG_DC_CTRL, tmp | DC_CTRL_INV_MODE_FLUSH);
 240         }
 241
 242         if (cacheop & OP_INV)   /* Inv or flush-n-inv use same cmd reg */
 243                 aux = ARC_REG_DC_IVDC;
 244         else
 245                 aux = ARC_REG_DC_FLSH;
 246
 247         write_aux_reg(aux, 0x1);
 248
 249         if (cacheop & OP_FLUSH) /* flush / flush-n-inv both wait */
 250                 wait_for_flush();
 251
 252         /* Switch back the DISCARD ONLY Invalidate mode */
 253         if (cacheop == OP_FLUSH_N_INV)
 254                 write_aux_reg(ARC_REG_DC_CTRL, tmp & ~DC_CTRL_INV_MODE_FLUSH);
 255
 256         local_irq_restore(flags);
 257 }
 258
 259 /*
 260  * Per Line Operation on D-Cache
 261  * Doesn't deal with type-of-op/IRQ-disabling/waiting-for-flush-to-complete
 262  * It's sole purpose is to help gcc generate ZOL
 263  */
 264 static inline void __dc_line_loop(unsigned long start, unsigned long sz,
 265                                           int aux_reg)
 266 {
 267         int num_lines, slack;
 268
 269         /* Ensure we properly floor/ceil the non-line aligned/sized requests
 270          * and have @start - aligned to cache line and integral @num_lines.
 271          * This however can be avoided for page sized since:
 272          *  -@start will be cache-line aligned already (being page aligned)
 273          *  -@sz will be integral multiple of line size (being page sized).
 274          */
 275         if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
 276                 slack = start & ~DCACHE_LINE_MASK;
 277                 sz += slack;
 278                 start -= slack;
 279         }
 280
 281         num_lines = DIV_ROUND_UP(sz, ARC_DCACHE_LINE_LEN);
 282
 283         while (num_lines-- > 0) {
 284 #if (CONFIG_ARC_MMU_VER > 2)
 285                 /*
 286                  * Just as for I$, in MMU v3, D$ ops also require
 287                  * "tag" bits in DC_PTAG, "index" bits in FLDL,IVDL ops
 288                  * But we pass phy addr for both. This works since Linux
 289                  * doesn't support aliasing configs for D$, yet.
 290                  * Thus paddr is enough to provide both tag and index.
 291                  */
 292                 write_aux_reg(ARC_REG_DC_PTAG, start);
 293 #endif
 294                 write_aux_reg(aux_reg, start);
 295                 start += ARC_DCACHE_LINE_LEN;
 296         }
 297 }
 298
 299 /*
 300  * D-Cache : Per Line INV (discard or wback+discard) or FLUSH (wback)
 301  */
 302 static inline void __dc_line_op(unsigned long start, unsigned long sz,
 303                                         const int cacheop)
 304 {
 305         unsigned long flags, tmp = tmp;
 306         int aux;
 307
 308         local_irq_save(flags);
 309
 310         if (cacheop == OP_FLUSH_N_INV) {
 311                 /*
 312                  * Dcache provides 2 cmd: FLUSH or INV
 313                  * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
 314                  * flush-n-inv is achieved by INV cmd but with IM=1
 315                  * Default INV sub-mode is DISCARD, which needs to be toggled
 316                  */
 317                 tmp = read_aux_reg(ARC_REG_DC_CTRL);
 318                 write_aux_reg(ARC_REG_DC_CTRL, tmp | DC_CTRL_INV_MODE_FLUSH);
 319         }
 320
 321         if (cacheop & OP_INV)   /* Inv / flush-n-inv use same cmd reg */
 322                 aux = ARC_REG_DC_IVDL;
 323         else
 324                 aux = ARC_REG_DC_FLDL;
 325
 326         __dc_line_loop(start, sz, aux);
 327
 328         if (cacheop & OP_FLUSH) /* flush / flush-n-inv both wait */
 329                 wait_for_flush();
 330
 331         /* Switch back the DISCARD ONLY Invalidate mode */
 332         if (cacheop == OP_FLUSH_N_INV)
 333                 write_aux_reg(ARC_REG_DC_CTRL, tmp & ~DC_CTRL_INV_MODE_FLUSH);
 334
 335         local_irq_restore(flags);
 336 }
 337
 338 #else
 339
 340 #define __dc_entire_op(cacheop)
 341 #define __dc_line_op(start, sz, cacheop)
 342
 343 #endif /* CONFIG_ARC_HAS_DCACHE */
 344
 345
 346 #ifdef CONFIG_ARC_HAS_ICACHE
 347
 348 /*
 349  *              I-Cache Aliasing in ARC700 VIPT caches
 350  *
 351  * For fetching code from I$, ARC700 uses vaddr (embedded in program code)
 352  * to "index" into SET of cache-line and paddr from MMU to match the TAG
 353  * in the WAYS of SET.
 354  *
 355  * However the CDU iterface (to flush/inv) lines from software, only takes
 356  * paddr (to have simpler hardware interface). For simpler cases, using paddr
 357  * alone suffices.
 358  * e.g. 2-way-set-assoc, 16K I$ (8k MMU pg sz, 32b cache line size):
 359  *      way_sz = cache_sz / num_ways = 16k/2 = 8k
 360  *      num_sets = way_sz / line_sz = 8k/32 = 256 => 8 bits
 361  *   Ignoring the bottom 5 bits corresp to the off within a 32b cacheline,
 362  *   bits req for calc set-index = bits 12:5 (0 based). Since this range fits
 363  *   inside the bottom 13 bits of paddr, which are same for vaddr and paddr
 364  *   (with 8k pg sz), paddr alone can be safely used by CDU to unambigously
 365  *   locate a cache-line.
 366  *
 367  * However for a difft sized cache, say 32k I$, above math yields need
 368  * for 14 bits of vaddr to locate a cache line, which can't be provided by
 369  * paddr, since the bit 13 (0 based) might differ between the two.
 370  *
 371  * This lack of extra bits needed for correct line addressing, defines the
 372  * classical problem of Cache aliasing with VIPT architectures
 373  * num_aliases = 1 << extra_bits
 374  * e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz => 2 aliases
 375  *      2-way-set-assoc, 64K I$ with 8k MMU pg sz => 4 aliases
 376  *      2-way-set-assoc, 16K I$ with 8k MMU pg sz => NO aliases
 377  *
 378  * ------------------
 379  * MMU v1/v2 (Fixed Page Size 8k)
 380  * ------------------
 381  * The solution was to provide CDU with these additonal vaddr bits. These
 382  * would be bits [x:13], x would depend on cache-geom.
 383  * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
 384  * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
 385  * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
 386  * represent the offset within cache-line. The adv of using this "clumsy"
 387  * interface for additional info was no new reg was needed in CDU.
 388  *
 389  * 17:13 represented the max num of bits passable, actual bits needed were
 390  * fewer, based on the num-of-aliases possible.
 391  * -for 2 alias possibility, only bit 13 needed (32K cache)
 392  * -for 4 alias possibility, bits 14:13 needed (64K cache)
 393  *
 394  * Since vaddr was not available for all instances of I$ flush req by core
 395  * kernel, the only safe way (non-optimal though) was to kill all possible
 396  * lines which could represent an alias (even if they didnt represent one
 397  * in execution).
 398  * e.g. for 64K I$, 4 aliases possible, so we did
 399  *      flush start
 400  *      flush start | 0x01
 401  *      flush start | 0x2
 402  *      flush start | 0x3
 403  *
 404  * The penalty was invoking the operation itself, since tag match is anyways
 405  * paddr based, a line which didn't represent an alias would not match the
 406  * paddr, hence wont be killed
 407  *
 408  * Note that aliasing concerns are independent of line-sz for a given cache
 409  * geometry (size + set_assoc) because the extra bits required by line-sz are
 410  * reduced from the set calc.
 411  * e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz and using math above
 412  *  32b line-sz: 9 bits set-index-calc, 5 bits offset-in-line => 1 extra bit
 413  *  64b line-sz: 8 bits set-index-calc, 6 bits offset-in-line => 1 extra bit
 414  *
 415  * ------------------
 416  * MMU v3
 417  * ------------------
 418  * This ver of MMU supports var page sizes (1k-16k) - Linux will support
 419  * 8k (default), 16k and 4k.
 420  * However from hardware perspective, smaller page sizes aggrevate aliasing
 421  * meaning more vaddr bits needed to disambiguate the cache-line-op ;
 422  * the existing scheme of piggybacking won't work for certain configurations.
 423  * Two new registers IC_PTAG and DC_PTAG inttoduced.
 424  * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
 425  */
 426
 427 /***********************************************************
 428  * Machine specific helpers for per line I-Cache invalidate.
 429  * 3 routines to accpunt for 1, 2, 4 aliases possible
 430  */
 431
 432 static void __ic_line_inv_no_alias(unsigned long start, int num_lines)
 433 {
 434         while (num_lines-- > 0) {
 435 #if (CONFIG_ARC_MMU_VER > 2)
 436                 write_aux_reg(ARC_REG_IC_PTAG, start);
 437 #endif
 438                 write_aux_reg(ARC_REG_IC_IVIL, start);
 439                 start += ARC_ICACHE_LINE_LEN;
 440         }
 441 }
 442
 443 static void __ic_line_inv_2_alias(unsigned long start, int num_lines)
 444 {
 445         while (num_lines-- > 0) {
 446
 447 #if (CONFIG_ARC_MMU_VER > 2)
 448                 /*
 449                  *  MMU v3, CDU prog model (for line ops) now uses a new IC_PTAG
 450                  * reg to pass the "tag" bits and existing IVIL reg only looks
 451                  * at bits relevant for "index" (details above)
 452                  * Programming Notes:
 453                  * -when writing tag to PTAG reg, bit chopping can be avoided,
 454                  *  CDU ignores non-tag bits.
 455                  * -Ideally "index" must be computed from vaddr, but it is not
 456                  *  avail in these rtns. So to be safe, we kill the lines in all
 457                  *  possible indexes corresp to num of aliases possible for
 458                  *  given cache config.
 459                  */
 460                 write_aux_reg(ARC_REG_IC_PTAG, start);
 461                 write_aux_reg(ARC_REG_IC_IVIL,
 462                                   start & ~(0x1 << PAGE_SHIFT));
 463                 write_aux_reg(ARC_REG_IC_IVIL, start | (0x1 << PAGE_SHIFT));
 464 #else
 465                 write_aux_reg(ARC_REG_IC_IVIL, start);
 466                 write_aux_reg(ARC_REG_IC_IVIL, start | 0x01);
 467 #endif
 468                 start += ARC_ICACHE_LINE_LEN;
 469         }
 470 }
 471
 472 static void __ic_line_inv_4_alias(unsigned long start, int num_lines)
 473 {
 474         while (num_lines-- > 0) {
 475
 476 #if (CONFIG_ARC_MMU_VER > 2)
 477                 write_aux_reg(ARC_REG_IC_PTAG, start);
 478
 479                 write_aux_reg(ARC_REG_IC_IVIL,
 480                                   start & ~(0x3 << PAGE_SHIFT));
 481                 write_aux_reg(ARC_REG_IC_IVIL,
 482                                   start & ~(0x2 << PAGE_SHIFT));
 483                 write_aux_reg(ARC_REG_IC_IVIL,
 484                                   start & ~(0x1 << PAGE_SHIFT));
 485                 write_aux_reg(ARC_REG_IC_IVIL, start | (0x3 << PAGE_SHIFT));
 486 #else
 487                 write_aux_reg(ARC_REG_IC_IVIL, start);
 488                 write_aux_reg(ARC_REG_IC_IVIL, start | 0x01);
 489                 write_aux_reg(ARC_REG_IC_IVIL, start | 0x02);
 490                 write_aux_reg(ARC_REG_IC_IVIL, start | 0x03);
 491 #endif
 492                 start += ARC_ICACHE_LINE_LEN;
 493         }
 494 }
 495
 496 static void __ic_line_inv(unsigned long start, unsigned long sz)
 497 {
 498         unsigned long flags;
 499         int num_lines, slack;
 500
 501         /*
 502          * Ensure we properly floor/ceil the non-line aligned/sized requests
 503          * and have @start - aligned to cache line, and integral @num_lines
 504          * However page sized flushes can be compile time optimised.
 505          *  -@start will be cache-line aligned already (being page aligned)
 506          *  -@sz will be integral multiple of line size (being page sized).
 507          */
 508         if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
 509                 slack = start & ~ICACHE_LINE_MASK;
 510                 sz += slack;
 511                 start -= slack;
 512         }
 513
 514         num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);
 515
 516         local_irq_save(flags);
 517         (*___flush_icache_rtn) (start, num_lines);
 518         local_irq_restore(flags);
 519 }
 520
 521 /* Unlike routines above, having vaddr for flush op (along with paddr),
 522  * prevents the need to speculatively kill the lines in multiple sets
 523  * based on ratio of way_sz : pg_sz
 524  */
 525 static void __ic_line_inv_vaddr(unsigned long phy_start,
 526                                          unsigned long vaddr, unsigned long sz)
 527 {
 528         unsigned long flags;
 529         int num_lines, slack;
 530         unsigned int addr;
 531
 532         slack = phy_start & ~ICACHE_LINE_MASK;
 533         sz += slack;
 534         phy_start -= slack;
 535         num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);
 536
 537 #if (CONFIG_ARC_MMU_VER > 2)
 538         vaddr &= ~ICACHE_LINE_MASK;
 539         addr = phy_start;
 540 #else
 541         /* bits 17:13 of vaddr go as bits 4:0 of paddr */
 542         addr = phy_start | ((vaddr >> 13) & 0x1F);
 543 #endif
 544
 545         local_irq_save(flags);
 546         while (num_lines-- > 0) {
 547 #if (CONFIG_ARC_MMU_VER > 2)
 548                 /* tag comes from phy addr */
 549                 write_aux_reg(ARC_REG_IC_PTAG, addr);
 550
 551                 /* index bits come from vaddr */
 552                 write_aux_reg(ARC_REG_IC_IVIL, vaddr);
 553                 vaddr += ARC_ICACHE_LINE_LEN;
 554 #else
 555                 /* this paddr contains vaddrs bits as needed */
 556                 write_aux_reg(ARC_REG_IC_IVIL, addr);
 557 #endif
 558                 addr += ARC_ICACHE_LINE_LEN;
 559         }
 560         local_irq_restore(flags);
 561 }
 562
 563 #else
 564
 565 #define __ic_line_inv(start, sz)
 566 #define __ic_line_inv_vaddr(pstart, vstart, sz)
 567
 568 #endif /* CONFIG_ARC_HAS_ICACHE */
 569
 570
 571 /***********************************************************
 572  * Exported APIs
 573  */
 574
 575 /* TBD: use pg_arch_1 to optimize this */
 576 void flush_dcache_page(struct page *page)
 577 {
 578         __dc_line_op((unsigned long)page_address(page), PAGE_SIZE, OP_FLUSH);
 579 }
 580 EXPORT_SYMBOL(flush_dcache_page);
 581
 582
 583 void dma_cache_wback_inv(unsigned long start, unsigned long sz)
 584 {
 585         __dc_line_op(start, sz, OP_FLUSH_N_INV);
 586 }
 587 EXPORT_SYMBOL(dma_cache_wback_inv);
 588
 589 void dma_cache_inv(unsigned long start, unsigned long sz)
 590 {
 591         __dc_line_op(start, sz, OP_INV);
 592 }
 593 EXPORT_SYMBOL(dma_cache_inv);
 594
 595 void dma_cache_wback(unsigned long start, unsigned long sz)
 596 {
 597         __dc_line_op(start, sz, OP_FLUSH);
 598 }
 599 EXPORT_SYMBOL(dma_cache_wback);
 600
 601 /*
 602  * This is API for making I/D Caches consistent when modifying code
 603  * (loadable modules, kprobes,  etc)
 604  * This is called on insmod, with kernel virtual address for CODE of
 605  * the module. ARC cache maintenance ops require PHY address thus we
 606  * need to convert vmalloc addr to PHY addr
 607  */
 608 void flush_icache_range(unsigned long kstart, unsigned long kend)
 609 {
 610         unsigned int tot_sz, off, sz;
 611         unsigned long phy, pfn;
 612         unsigned long flags;
 613
 614         /* printk("Kernel Cache Cohenercy: %lx to %lx\n",kstart, kend); */
 615
 616         /* This is not the right API for user virtual address */
 617         if (kstart < TASK_SIZE) {
 618                 BUG_ON("Flush icache range for user virtual addr space");
 619                 return;
 620         }
 621
 622         /* Shortcut for bigger flush ranges.
 623          * Here we don't care if this was kernel virtual or phy addr
 624          */
 625         tot_sz = kend - kstart;
 626         if (tot_sz > PAGE_SIZE) {
 627                 flush_cache_all();
 628                 return;
 629         }
 630
 631         /* Case: Kernel Phy addr (0x8000_0000 onwards) */
 632         if (likely(kstart > PAGE_OFFSET)) {
 633                 __ic_line_inv(kstart, kend - kstart);
 634                 __dc_line_op(kstart, kend - kstart, OP_FLUSH);
 635                 return;
 636         }
 637
 638         /*
 639          * Case: Kernel Vaddr (0x7000_0000 to 0x7fff_ffff)
 640          * (1) ARC Cache Maintenance ops only take Phy addr, hence special
 641          *     handling of kernel vaddr.
 642          *
 643          * (2) Despite @tot_sz being < PAGE_SIZE (bigger cases handled already),
 644          *     it still needs to handle  a 2 page scenario, where the range
 645          *     straddles across 2 virtual pages and hence need for loop
 646          */
 647         while (tot_sz > 0) {
 648                 off = kstart % PAGE_SIZE;
 649                 pfn = vmalloc_to_pfn((void *)kstart);
 650                 phy = (pfn << PAGE_SHIFT) + off;
 651                 sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off);
 652                 local_irq_save(flags);
 653                 __dc_line_op(phy, sz, OP_FLUSH);
 654                 __ic_line_inv(phy, sz);
 655                 local_irq_restore(flags);
 656                 kstart += sz;
 657                 tot_sz -= sz;
 658         }
 659 }
 660
 661 /*
 662  * Optimised ver of flush_icache_range() with spec callers: ptrace/signals
 663  * where vaddr is also available. This allows passing both vaddr and paddr
 664  * bits to CDU for cache flush, short-circuting the current pessimistic algo
 665  * which kills all possible aliases.
 666  * An added adv of knowing that vaddr is user-vaddr avoids various checks
 667  * and handling for k-vaddr, k-paddr as done in orig ver above
 668  */
 669 void flush_icache_range_vaddr(unsigned long paddr, unsigned long u_vaddr,
 670                               int len)
 671 {
 672         __ic_line_inv_vaddr(paddr, u_vaddr, len);
 673         __dc_line_op(paddr, len, OP_FLUSH);
 674 }
 675
 676 /*
 677  * XXX: This also needs to be optim using pg_arch_1
 678  * This is called when a page-cache page is about to be mapped into a
 679  * user process' address space.  It offers an opportunity for a
 680  * port to ensure d-cache/i-cache coherency if necessary.
 681  */
 682 void flush_icache_page(struct vm_area_struct *vma, struct page *page)
 683 {
 684         if (!(vma->vm_flags & VM_EXEC))
 685                 return;
 686
 687         __ic_line_inv((unsigned long)page_address(page), PAGE_SIZE);
 688 }
 689
 690 void flush_icache_all(void)
 691 {
 692         unsigned long flags;
 693
 694         local_irq_save(flags);
 695
 696         write_aux_reg(ARC_REG_IC_IVIC, 1);
 697
 698         /* lr will not complete till the icache inv operation is not over */
 699         read_aux_reg(ARC_REG_IC_CTRL);
 700         local_irq_restore(flags);
 701 }
 702
 703 noinline void flush_cache_all(void)
 704 {
 705         unsigned long flags;
 706
 707         local_irq_save(flags);
 708
 709         flush_icache_all();
 710         __dc_entire_op(OP_FLUSH_N_INV);
 711
 712         local_irq_restore(flags);
 713
 714 }
 715
 716 /**********************************************************************
 717  * Explicit Cache flush request from user space via syscall
 718  * Needed for JITs which generate code on the fly
 719  */
 720 SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags)
 721 {
 722         /* TBD: optimize this */
 723         flush_cache_all();
 724         return 0;
 725 }