tcg/region.c

   1 /*
   2  * Memory region management for Tiny Code Generator for QEMU
   3  *
   4  * Copyright (c) 2008 Fabrice Bellard
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a copy
   7  * of this software and associated documentation files (the "Software"), to deal
   8  * in the Software without restriction, including without limitation the rights
   9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10  * copies of the Software, and to permit persons to whom the Software is
  11  * furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in
  14  * all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22  * THE SOFTWARE.
  23  */
  24
  25 #include "qemu/osdep.h"
  26 #include "qemu/units.h"
  27 #include "qapi/error.h"
  28 #include "exec/exec-all.h"
  29 #include "tcg/tcg.h"
  30 #if !defined(CONFIG_USER_ONLY)
  31 #include "hw/boards.h"
  32 #endif
  33 #include "tcg-internal.h"
  34
  35
  36 struct tcg_region_tree {
  37     QemuMutex lock;
  38     GTree *tree;
  39     /* padding to avoid false sharing is computed at run-time */
  40 };
  41
  42 /*
  43  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
  44  * dynamically allocate from as demand dictates. Given appropriate region
  45  * sizing, this minimizes flushes even when some TCG threads generate a lot
  46  * more code than others.
  47  */
  48 struct tcg_region_state {
  49     QemuMutex lock;
  50
  51     /* fields set at init time */
  52     void *start;
  53     void *start_aligned;
  54     void *end;
  55     size_t n;
  56     size_t size; /* size of one region */
  57     size_t stride; /* .size + guard size */
  58
  59     /* fields protected by the lock */
  60     size_t current; /* current region index */
  61     size_t agg_size_full; /* aggregate size of full regions */
  62 };
  63
  64 static struct tcg_region_state region;
  65
  66 /*
  67  * This is an array of struct tcg_region_tree's, with padding.
  68  * We use void * to simplify the computation of region_trees[i]; each
  69  * struct is found every tree_size bytes.
  70  */
  71 static void *region_trees;
  72 static size_t tree_size;
  73
  74 /* compare a pointer @ptr and a tb_tc @s */
  75 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
  76 {
  77     if (ptr >= s->ptr + s->size) {
  78         return 1;
  79     } else if (ptr < s->ptr) {
  80         return -1;
  81     }
  82     return 0;
  83 }
  84
  85 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
  86 {
  87     const struct tb_tc *a = ap;
  88     const struct tb_tc *b = bp;
  89
  90     /*
  91      * When both sizes are set, we know this isn't a lookup.
  92      * This is the most likely case: every TB must be inserted; lookups
  93      * are a lot less frequent.
  94      */
  95     if (likely(a->size && b->size)) {
  96         if (a->ptr > b->ptr) {
  97             return 1;
  98         } else if (a->ptr < b->ptr) {
  99             return -1;
 100         }
 101         /* a->ptr == b->ptr should happen only on deletions */
 102         g_assert(a->size == b->size);
 103         return 0;
 104     }
 105     /*
 106      * All lookups have either .size field set to 0.
 107      * From the glib sources we see that @ap is always the lookup key. However
 108      * the docs provide no guarantee, so we just mark this case as likely.
 109      */
 110     if (likely(a->size == 0)) {
 111         return ptr_cmp_tb_tc(a->ptr, b);
 112     }
 113     return ptr_cmp_tb_tc(b->ptr, a);
 114 }
 115
 116 static void tcg_region_trees_init(void)
 117 {
 118     size_t i;
 119
 120     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
 121     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
 122     for (i = 0; i < region.n; i++) {
 123         struct tcg_region_tree *rt = region_trees + i * tree_size;
 124
 125         qemu_mutex_init(&rt->lock);
 126         rt->tree = g_tree_new(tb_tc_cmp);
 127     }
 128 }
 129
 130 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
 131 {
 132     size_t region_idx;
 133
 134     /*
 135      * Like tcg_splitwx_to_rw, with no assert.  The pc may come from
 136      * a signal handler over which the caller has no control.
 137      */
 138     if (!in_code_gen_buffer(p)) {
 139         p -= tcg_splitwx_diff;
 140         if (!in_code_gen_buffer(p)) {
 141             return NULL;
 142         }
 143     }
 144
 145     if (p < region.start_aligned) {
 146         region_idx = 0;
 147     } else {
 148         ptrdiff_t offset = p - region.start_aligned;
 149
 150         if (offset > region.stride * (region.n - 1)) {
 151             region_idx = region.n - 1;
 152         } else {
 153             region_idx = offset / region.stride;
 154         }
 155     }
 156     return region_trees + region_idx * tree_size;
 157 }
 158
 159 void tcg_tb_insert(TranslationBlock *tb)
 160 {
 161     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 162
 163     g_assert(rt != NULL);
 164     qemu_mutex_lock(&rt->lock);
 165     g_tree_insert(rt->tree, &tb->tc, tb);
 166     qemu_mutex_unlock(&rt->lock);
 167 }
 168
 169 void tcg_tb_remove(TranslationBlock *tb)
 170 {
 171     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 172
 173     g_assert(rt != NULL);
 174     qemu_mutex_lock(&rt->lock);
 175     g_tree_remove(rt->tree, &tb->tc);
 176     qemu_mutex_unlock(&rt->lock);
 177 }
 178
 179 /*
 180  * Find the TB 'tb' such that
 181  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
 182  * Return NULL if not found.
 183  */
 184 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
 185 {
 186     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
 187     TranslationBlock *tb;
 188     struct tb_tc s = { .ptr = (void *)tc_ptr };
 189
 190     if (rt == NULL) {
 191         return NULL;
 192     }
 193
 194     qemu_mutex_lock(&rt->lock);
 195     tb = g_tree_lookup(rt->tree, &s);
 196     qemu_mutex_unlock(&rt->lock);
 197     return tb;
 198 }
 199
 200 static void tcg_region_tree_lock_all(void)
 201 {
 202     size_t i;
 203
 204     for (i = 0; i < region.n; i++) {
 205         struct tcg_region_tree *rt = region_trees + i * tree_size;
 206
 207         qemu_mutex_lock(&rt->lock);
 208     }
 209 }
 210
 211 static void tcg_region_tree_unlock_all(void)
 212 {
 213     size_t i;
 214
 215     for (i = 0; i < region.n; i++) {
 216         struct tcg_region_tree *rt = region_trees + i * tree_size;
 217
 218         qemu_mutex_unlock(&rt->lock);
 219     }
 220 }
 221
 222 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
 223 {
 224     size_t i;
 225
 226     tcg_region_tree_lock_all();
 227     for (i = 0; i < region.n; i++) {
 228         struct tcg_region_tree *rt = region_trees + i * tree_size;
 229
 230         g_tree_foreach(rt->tree, func, user_data);
 231     }
 232     tcg_region_tree_unlock_all();
 233 }
 234
 235 size_t tcg_nb_tbs(void)
 236 {
 237     size_t nb_tbs = 0;
 238     size_t i;
 239
 240     tcg_region_tree_lock_all();
 241     for (i = 0; i < region.n; i++) {
 242         struct tcg_region_tree *rt = region_trees + i * tree_size;
 243
 244         nb_tbs += g_tree_nnodes(rt->tree);
 245     }
 246     tcg_region_tree_unlock_all();
 247     return nb_tbs;
 248 }
 249
 250 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
 251 {
 252     TranslationBlock *tb = v;
 253
 254     tb_destroy(tb);
 255     return FALSE;
 256 }
 257
 258 static void tcg_region_tree_reset_all(void)
 259 {
 260     size_t i;
 261
 262     tcg_region_tree_lock_all();
 263     for (i = 0; i < region.n; i++) {
 264         struct tcg_region_tree *rt = region_trees + i * tree_size;
 265
 266         g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
 267         /* Increment the refcount first so that destroy acts as a reset */
 268         g_tree_ref(rt->tree);
 269         g_tree_destroy(rt->tree);
 270     }
 271     tcg_region_tree_unlock_all();
 272 }
 273
 274 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
 275 {
 276     void *start, *end;
 277
 278     start = region.start_aligned + curr_region * region.stride;
 279     end = start + region.size;
 280
 281     if (curr_region == 0) {
 282         start = region.start;
 283     }
 284     if (curr_region == region.n - 1) {
 285         end = region.end;
 286     }
 287
 288     *pstart = start;
 289     *pend = end;
 290 }
 291
 292 static void tcg_region_assign(TCGContext *s, size_t curr_region)
 293 {
 294     void *start, *end;
 295
 296     tcg_region_bounds(curr_region, &start, &end);
 297
 298     s->code_gen_buffer = start;
 299     s->code_gen_ptr = start;
 300     s->code_gen_buffer_size = end - start;
 301     s->code_gen_highwater = end - TCG_HIGHWATER;
 302 }
 303
 304 static bool tcg_region_alloc__locked(TCGContext *s)
 305 {
 306     if (region.current == region.n) {
 307         return true;
 308     }
 309     tcg_region_assign(s, region.current);
 310     region.current++;
 311     return false;
 312 }
 313
 314 /*
 315  * Request a new region once the one in use has filled up.
 316  * Returns true on error.
 317  */
 318 bool tcg_region_alloc(TCGContext *s)
 319 {
 320     bool err;
 321     /* read the region size now; alloc__locked will overwrite it on success */
 322     size_t size_full = s->code_gen_buffer_size;
 323
 324     qemu_mutex_lock(&region.lock);
 325     err = tcg_region_alloc__locked(s);
 326     if (!err) {
 327         region.agg_size_full += size_full - TCG_HIGHWATER;
 328     }
 329     qemu_mutex_unlock(&region.lock);
 330     return err;
 331 }
 332
 333 /*
 334  * Perform a context's first region allocation.
 335  * This function does _not_ increment region.agg_size_full.
 336  */
 337 static void tcg_region_initial_alloc__locked(TCGContext *s)
 338 {
 339     bool err = tcg_region_alloc__locked(s);
 340     g_assert(!err);
 341 }
 342
 343 void tcg_region_initial_alloc(TCGContext *s)
 344 {
 345     qemu_mutex_lock(&region.lock);
 346     tcg_region_initial_alloc__locked(s);
 347     qemu_mutex_unlock(&region.lock);
 348 }
 349
 350 /* Call from a safe-work context */
 351 void tcg_region_reset_all(void)
 352 {
 353     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
 354     unsigned int i;
 355
 356     qemu_mutex_lock(&region.lock);
 357     region.current = 0;
 358     region.agg_size_full = 0;
 359
 360     for (i = 0; i < n_ctxs; i++) {
 361         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
 362         tcg_region_initial_alloc__locked(s);
 363     }
 364     qemu_mutex_unlock(&region.lock);
 365
 366     tcg_region_tree_reset_all();
 367 }
 368
 369 #ifdef CONFIG_USER_ONLY
 370 static size_t tcg_n_regions(void)
 371 {
 372     return 1;
 373 }
 374 #else
 375 /*
 376  * It is likely that some vCPUs will translate more code than others, so we
 377  * first try to set more regions than max_cpus, with those regions being of
 378  * reasonable size. If that's not possible we make do by evenly dividing
 379  * the code_gen_buffer among the vCPUs.
 380  */
 381 static size_t tcg_n_regions(void)
 382 {
 383     size_t i;
 384
 385     /* Use a single region if all we have is one vCPU thread */
 386 #if !defined(CONFIG_USER_ONLY)
 387     MachineState *ms = MACHINE(qdev_get_machine());
 388     unsigned int max_cpus = ms->smp.max_cpus;
 389 #endif
 390     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
 391         return 1;
 392     }
 393
 394     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
 395     for (i = 8; i > 0; i--) {
 396         size_t regions_per_thread = i;
 397         size_t region_size;
 398
 399         region_size = tcg_init_ctx.code_gen_buffer_size;
 400         region_size /= max_cpus * regions_per_thread;
 401
 402         if (region_size >= 2 * 1024u * 1024) {
 403             return max_cpus * regions_per_thread;
 404         }
 405     }
 406     /* If we can't, then just allocate one region per vCPU thread */
 407     return max_cpus;
 408 }
 409 #endif
 410
 411 /*
 412  * Minimum size of the code gen buffer.  This number is randomly chosen,
 413  * but not so small that we can't have a fair number of TB's live.
 414  */
 415 #define MIN_CODE_GEN_BUFFER_SIZE     (1 * MiB)
 416
 417 /*
 418  * Maximum size of the code gen buffer we'd like to use.  Unless otherwise
 419  * indicated, this is constrained by the range of direct branches on the
 420  * host cpu, as used by the TCG implementation of goto_tb.
 421  */
 422 #if defined(__x86_64__)
 423 # define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
 424 #elif defined(__sparc__)
 425 # define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
 426 #elif defined(__powerpc64__)
 427 # define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
 428 #elif defined(__powerpc__)
 429 # define MAX_CODE_GEN_BUFFER_SIZE  (32 * MiB)
 430 #elif defined(__aarch64__)
 431 # define MAX_CODE_GEN_BUFFER_SIZE  (2 * GiB)
 432 #elif defined(__s390x__)
 433   /* We have a +- 4GB range on the branches; leave some slop.  */
 434 # define MAX_CODE_GEN_BUFFER_SIZE  (3 * GiB)
 435 #elif defined(__mips__)
 436   /*
 437    * We have a 256MB branch region, but leave room to make sure the
 438    * main executable is also within that region.
 439    */
 440 # define MAX_CODE_GEN_BUFFER_SIZE  (128 * MiB)
 441 #else
 442 # define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
 443 #endif
 444
 445 #if TCG_TARGET_REG_BITS == 32
 446 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
 447 #ifdef CONFIG_USER_ONLY
 448 /*
 449  * For user mode on smaller 32 bit systems we may run into trouble
 450  * allocating big chunks of data in the right place. On these systems
 451  * we utilise a static code generation buffer directly in the binary.
 452  */
 453 #define USE_STATIC_CODE_GEN_BUFFER
 454 #endif
 455 #else /* TCG_TARGET_REG_BITS == 64 */
 456 #ifdef CONFIG_USER_ONLY
 457 /*
 458  * As user-mode emulation typically means running multiple instances
 459  * of the translator don't go too nuts with our default code gen
 460  * buffer lest we make things too hard for the OS.
 461  */
 462 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB)
 463 #else
 464 /*
 465  * We expect most system emulation to run one or two guests per host.
 466  * Users running large scale system emulation may want to tweak their
 467  * runtime setup via the tb-size control on the command line.
 468  */
 469 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB)
 470 #endif
 471 #endif
 472
 473 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
 474   (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
 475    ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
 476
 477 static size_t size_code_gen_buffer(size_t tb_size)
 478 {
 479     /* Size the buffer.  */
 480     if (tb_size == 0) {
 481         size_t phys_mem = qemu_get_host_physmem();
 482         if (phys_mem == 0) {
 483             tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
 484         } else {
 485             tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, phys_mem / 8);
 486         }
 487     }
 488     if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
 489         tb_size = MIN_CODE_GEN_BUFFER_SIZE;
 490     }
 491     if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
 492         tb_size = MAX_CODE_GEN_BUFFER_SIZE;
 493     }
 494     return tb_size;
 495 }
 496
 497 #ifdef __mips__
 498 /*
 499  * In order to use J and JAL within the code_gen_buffer, we require
 500  * that the buffer not cross a 256MB boundary.
 501  */
 502 static inline bool cross_256mb(void *addr, size_t size)
 503 {
 504     return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & ~0x0ffffffful;
 505 }
 506
 507 /*
 508  * We weren't able to allocate a buffer without crossing that boundary,
 509  * so make do with the larger portion of the buffer that doesn't cross.
 510  * Returns the new base of the buffer, and adjusts code_gen_buffer_size.
 511  */
 512 static inline void *split_cross_256mb(void *buf1, size_t size1)
 513 {
 514     void *buf2 = (void *)(((uintptr_t)buf1 + size1) & ~0x0ffffffful);
 515     size_t size2 = buf1 + size1 - buf2;
 516
 517     size1 = buf2 - buf1;
 518     if (size1 < size2) {
 519         size1 = size2;
 520         buf1 = buf2;
 521     }
 522
 523     tcg_ctx->code_gen_buffer_size = size1;
 524     return buf1;
 525 }
 526 #endif
 527
 528 #ifdef USE_STATIC_CODE_GEN_BUFFER
 529 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
 530     __attribute__((aligned(CODE_GEN_ALIGN)));
 531
 532 static bool alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
 533 {
 534     void *buf, *end;
 535     size_t size;
 536
 537     if (splitwx > 0) {
 538         error_setg(errp, "jit split-wx not supported");
 539         return false;
 540     }
 541
 542     /* page-align the beginning and end of the buffer */
 543     buf = static_code_gen_buffer;
 544     end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
 545     buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size);
 546     end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size);
 547
 548     size = end - buf;
 549
 550     /* Honor a command-line option limiting the size of the buffer.  */
 551     if (size > tb_size) {
 552         size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size);
 553     }
 554     tcg_ctx->code_gen_buffer_size = size;
 555
 556 #ifdef __mips__
 557     if (cross_256mb(buf, size)) {
 558         buf = split_cross_256mb(buf, size);
 559         size = tcg_ctx->code_gen_buffer_size;
 560     }
 561 #endif
 562
 563     if (qemu_mprotect_rwx(buf, size)) {
 564         error_setg_errno(errp, errno, "mprotect of jit buffer");
 565         return false;
 566     }
 567     qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
 568
 569     tcg_ctx->code_gen_buffer = buf;
 570     return true;
 571 }
 572 #elif defined(_WIN32)
 573 static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
 574 {
 575     void *buf;
 576
 577     if (splitwx > 0) {
 578         error_setg(errp, "jit split-wx not supported");
 579         return false;
 580     }
 581
 582     buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
 583                              PAGE_EXECUTE_READWRITE);
 584     if (buf == NULL) {
 585         error_setg_win32(errp, GetLastError(),
 586                          "allocate %zu bytes for jit buffer", size);
 587         return false;
 588     }
 589
 590     tcg_ctx->code_gen_buffer = buf;
 591     tcg_ctx->code_gen_buffer_size = size;
 592     return true;
 593 }
 594 #else
 595 static bool alloc_code_gen_buffer_anon(size_t size, int prot,
 596                                        int flags, Error **errp)
 597 {
 598     void *buf;
 599
 600     buf = mmap(NULL, size, prot, flags, -1, 0);
 601     if (buf == MAP_FAILED) {
 602         error_setg_errno(errp, errno,
 603                          "allocate %zu bytes for jit buffer", size);
 604         return false;
 605     }
 606     tcg_ctx->code_gen_buffer_size = size;
 607
 608 #ifdef __mips__
 609     if (cross_256mb(buf, size)) {
 610         /*
 611          * Try again, with the original still mapped, to avoid re-acquiring
 612          * the same 256mb crossing.
 613          */
 614         size_t size2;
 615         void *buf2 = mmap(NULL, size, prot, flags, -1, 0);
 616         switch ((int)(buf2 != MAP_FAILED)) {
 617         case 1:
 618             if (!cross_256mb(buf2, size)) {
 619                 /* Success!  Use the new buffer.  */
 620                 munmap(buf, size);
 621                 break;
 622             }
 623             /* Failure.  Work with what we had.  */
 624             munmap(buf2, size);
 625             /* fallthru */
 626         default:
 627             /* Split the original buffer.  Free the smaller half.  */
 628             buf2 = split_cross_256mb(buf, size);
 629             size2 = tcg_ctx->code_gen_buffer_size;
 630             if (buf == buf2) {
 631                 munmap(buf + size2, size - size2);
 632             } else {
 633                 munmap(buf, size - size2);
 634             }
 635             size = size2;
 636             break;
 637         }
 638         buf = buf2;
 639     }
 640 #endif
 641
 642     /* Request large pages for the buffer.  */
 643     qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
 644
 645     tcg_ctx->code_gen_buffer = buf;
 646     return true;
 647 }
 648
 649 #ifndef CONFIG_TCG_INTERPRETER
 650 #ifdef CONFIG_POSIX
 651 #include "qemu/memfd.h"
 652
 653 static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
 654 {
 655     void *buf_rw = NULL, *buf_rx = MAP_FAILED;
 656     int fd = -1;
 657
 658 #ifdef __mips__
 659     /* Find space for the RX mapping, vs the 256MiB regions. */
 660     if (!alloc_code_gen_buffer_anon(size, PROT_NONE,
 661                                     MAP_PRIVATE | MAP_ANONYMOUS |
 662                                     MAP_NORESERVE, errp)) {
 663         return false;
 664     }
 665     /* The size of the mapping may have been adjusted. */
 666     size = tcg_ctx->code_gen_buffer_size;
 667     buf_rx = tcg_ctx->code_gen_buffer;
 668 #endif
 669
 670     buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
 671     if (buf_rw == NULL) {
 672         goto fail;
 673     }
 674
 675 #ifdef __mips__
 676     void *tmp = mmap(buf_rx, size, PROT_READ | PROT_EXEC,
 677                      MAP_SHARED | MAP_FIXED, fd, 0);
 678     if (tmp != buf_rx) {
 679         goto fail_rx;
 680     }
 681 #else
 682     buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
 683     if (buf_rx == MAP_FAILED) {
 684         goto fail_rx;
 685     }
 686 #endif
 687
 688     close(fd);
 689     tcg_ctx->code_gen_buffer = buf_rw;
 690     tcg_ctx->code_gen_buffer_size = size;
 691     tcg_splitwx_diff = buf_rx - buf_rw;
 692
 693     /* Request large pages for the buffer and the splitwx.  */
 694     qemu_madvise(buf_rw, size, QEMU_MADV_HUGEPAGE);
 695     qemu_madvise(buf_rx, size, QEMU_MADV_HUGEPAGE);
 696     return true;
 697
 698  fail_rx:
 699     error_setg_errno(errp, errno, "failed to map shared memory for execute");
 700  fail:
 701     if (buf_rx != MAP_FAILED) {
 702         munmap(buf_rx, size);
 703     }
 704     if (buf_rw) {
 705         munmap(buf_rw, size);
 706     }
 707     if (fd >= 0) {
 708         close(fd);
 709     }
 710     return false;
 711 }
 712 #endif /* CONFIG_POSIX */
 713
 714 #ifdef CONFIG_DARWIN
 715 #include <mach/mach.h>
 716
 717 extern kern_return_t mach_vm_remap(vm_map_t target_task,
 718                                    mach_vm_address_t *target_address,
 719                                    mach_vm_size_t size,
 720                                    mach_vm_offset_t mask,
 721                                    int flags,
 722                                    vm_map_t src_task,
 723                                    mach_vm_address_t src_address,
 724                                    boolean_t copy,
 725                                    vm_prot_t *cur_protection,
 726                                    vm_prot_t *max_protection,
 727                                    vm_inherit_t inheritance);
 728
 729 static bool alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
 730 {
 731     kern_return_t ret;
 732     mach_vm_address_t buf_rw, buf_rx;
 733     vm_prot_t cur_prot, max_prot;
 734
 735     /* Map the read-write portion via normal anon memory. */
 736     if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,
 737                                     MAP_PRIVATE | MAP_ANONYMOUS, errp)) {
 738         return false;
 739     }
 740
 741     buf_rw = (mach_vm_address_t)tcg_ctx->code_gen_buffer;
 742     buf_rx = 0;
 743     ret = mach_vm_remap(mach_task_self(),
 744                         &buf_rx,
 745                         size,
 746                         0,
 747                         VM_FLAGS_ANYWHERE,
 748                         mach_task_self(),
 749                         buf_rw,
 750                         false,
 751                         &cur_prot,
 752                         &max_prot,
 753                         VM_INHERIT_NONE);
 754     if (ret != KERN_SUCCESS) {
 755         /* TODO: Convert "ret" to a human readable error message. */
 756         error_setg(errp, "vm_remap for jit splitwx failed");
 757         munmap((void *)buf_rw, size);
 758         return false;
 759     }
 760
 761     if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
 762         error_setg_errno(errp, errno, "mprotect for jit splitwx");
 763         munmap((void *)buf_rx, size);
 764         munmap((void *)buf_rw, size);
 765         return false;
 766     }
 767
 768     tcg_splitwx_diff = buf_rx - buf_rw;
 769     return true;
 770 }
 771 #endif /* CONFIG_DARWIN */
 772 #endif /* CONFIG_TCG_INTERPRETER */
 773
 774 static bool alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
 775 {
 776 #ifndef CONFIG_TCG_INTERPRETER
 777 # ifdef CONFIG_DARWIN
 778     return alloc_code_gen_buffer_splitwx_vmremap(size, errp);
 779 # endif
 780 # ifdef CONFIG_POSIX
 781     return alloc_code_gen_buffer_splitwx_memfd(size, errp);
 782 # endif
 783 #endif
 784     error_setg(errp, "jit split-wx not supported");
 785     return false;
 786 }
 787
 788 static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
 789 {
 790     ERRP_GUARD();
 791     int prot, flags;
 792
 793     if (splitwx) {
 794         if (alloc_code_gen_buffer_splitwx(size, errp)) {
 795             return true;
 796         }
 797         /*
 798          * If splitwx force-on (1), fail;
 799          * if splitwx default-on (-1), fall through to splitwx off.
 800          */
 801         if (splitwx > 0) {
 802             return false;
 803         }
 804         error_free_or_abort(errp);
 805     }
 806
 807     prot = PROT_READ | PROT_WRITE | PROT_EXEC;
 808     flags = MAP_PRIVATE | MAP_ANONYMOUS;
 809 #ifdef CONFIG_TCG_INTERPRETER
 810     /* The tcg interpreter does not need execute permission. */
 811     prot = PROT_READ | PROT_WRITE;
 812 #elif defined(CONFIG_DARWIN)
 813     /* Applicable to both iOS and macOS (Apple Silicon). */
 814     if (!splitwx) {
 815         flags |= MAP_JIT;
 816     }
 817 #endif
 818
 819     return alloc_code_gen_buffer_anon(size, prot, flags, errp);
 820 }
 821 #endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
 822
 823 /*
 824  * Initializes region partitioning.
 825  *
 826  * Called at init time from the parent thread (i.e. the one calling
 827  * tcg_context_init), after the target's TCG globals have been set.
 828  *
 829  * Region partitioning works by splitting code_gen_buffer into separate regions,
 830  * and then assigning regions to TCG threads so that the threads can translate
 831  * code in parallel without synchronization.
 832  *
 833  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
 834  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
 835  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
 836  * must have been parsed before calling this function, since it calls
 837  * qemu_tcg_mttcg_enabled().
 838  *
 839  * In user-mode we use a single region.  Having multiple regions in user-mode
 840  * is not supported, because the number of vCPU threads (recall that each thread
 841  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
 842  * OS, and usually this number is huge (tens of thousands is not uncommon).
 843  * Thus, given this large bound on the number of vCPU threads and the fact
 844  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
 845  * that the availability of at least one region per vCPU thread.
 846  *
 847  * However, this user-mode limitation is unlikely to be a significant problem
 848  * in practice. Multi-threaded guests share most if not all of their translated
 849  * code, which makes parallel code generation less appealing than in softmmu.
 850  */
 851 void tcg_region_init(size_t tb_size, int splitwx)
 852 {
 853     void *buf, *aligned;
 854     size_t size;
 855     size_t page_size;
 856     size_t region_size;
 857     size_t n_regions;
 858     size_t i;
 859     bool ok;
 860
 861     ok = alloc_code_gen_buffer(size_code_gen_buffer(tb_size),
 862                                splitwx, &error_fatal);
 863     assert(ok);
 864
 865     buf = tcg_init_ctx.code_gen_buffer;
 866     size = tcg_init_ctx.code_gen_buffer_size;
 867     page_size = qemu_real_host_page_size;
 868     n_regions = tcg_n_regions();
 869
 870     /* The first region will be 'aligned - buf' bytes larger than the others */
 871     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
 872     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
 873     /*
 874      * Make region_size a multiple of page_size, using aligned as the start.
 875      * As a result of this we might end up with a few extra pages at the end of
 876      * the buffer; we will assign those to the last region.
 877      */
 878     region_size = (size - (aligned - buf)) / n_regions;
 879     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
 880
 881     /* A region must have at least 2 pages; one code, one guard */
 882     g_assert(region_size >= 2 * page_size);
 883
 884     /* init the region struct */
 885     qemu_mutex_init(&region.lock);
 886     region.n = n_regions;
 887     region.size = region_size - page_size;
 888     region.stride = region_size;
 889     region.start = buf;
 890     region.start_aligned = aligned;
 891     /* page-align the end, since its last page will be a guard page */
 892     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
 893     /* account for that last guard page */
 894     region.end -= page_size;
 895
 896     /*
 897      * Set guard pages in the rw buffer, as that's the one into which
 898      * buffer overruns could occur.  Do not set guard pages in the rx
 899      * buffer -- let that one use hugepages throughout.
 900      */
 901     for (i = 0; i < region.n; i++) {
 902         void *start, *end;
 903
 904         tcg_region_bounds(i, &start, &end);
 905
 906         /*
 907          * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
 908          * rejects a permission change from RWX -> NONE.  Guard pages are
 909          * nice for bug detection but are not essential; ignore any failure.
 910          */
 911         (void)qemu_mprotect_none(end, page_size);
 912     }
 913
 914     tcg_region_trees_init();
 915
 916     /*
 917      * Leave the initial context initialized to the first region.
 918      * This will be the context into which we generate the prologue.
 919      * It is also the only context for CONFIG_USER_ONLY.
 920      */
 921     tcg_region_initial_alloc__locked(&tcg_init_ctx);
 922 }
 923
 924 void tcg_region_prologue_set(TCGContext *s)
 925 {
 926     /* Deduct the prologue from the first region.  */
 927     g_assert(region.start == s->code_gen_buffer);
 928     region.start = s->code_ptr;
 929
 930     /* Recompute boundaries of the first region. */
 931     tcg_region_assign(s, 0);
 932
 933     /* Register the balance of the buffer with gdb. */
 934     tcg_register_jit(tcg_splitwx_to_rx(region.start),
 935                      region.end - region.start);
 936 }
 937
 938 /*
 939  * Returns the size (in bytes) of all translated code (i.e. from all regions)
 940  * currently in the cache.
 941  * See also: tcg_code_capacity()
 942  * Do not confuse with tcg_current_code_size(); that one applies to a single
 943  * TCG context.
 944  */
 945 size_t tcg_code_size(void)
 946 {
 947     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
 948     unsigned int i;
 949     size_t total;
 950
 951     qemu_mutex_lock(&region.lock);
 952     total = region.agg_size_full;
 953     for (i = 0; i < n_ctxs; i++) {
 954         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
 955         size_t size;
 956
 957         size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
 958         g_assert(size <= s->code_gen_buffer_size);
 959         total += size;
 960     }
 961     qemu_mutex_unlock(&region.lock);
 962     return total;
 963 }
 964
 965 /*
 966  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
 967  * regions.
 968  * See also: tcg_code_size()
 969  */
 970 size_t tcg_code_capacity(void)
 971 {
 972     size_t guard_size, capacity;
 973
 974     /* no need for synchronization; these variables are set at init time */
 975     guard_size = region.stride - region.size;
 976     capacity = region.end + guard_size - region.start;
 977     capacity -= region.n * (guard_size + TCG_HIGHWATER);
 978     return capacity;
 979 }
 980
 981 size_t tcg_tb_phys_invalidate_count(void)
 982 {
 983     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
 984     unsigned int i;
 985     size_t total = 0;
 986
 987     for (i = 0; i < n_ctxs; i++) {
 988         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
 989
 990         total += qatomic_read(&s->tb_phys_invalidate_count);
 991     }
 992     return total;
 993 }