2 * Memory region management for Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "qemu/osdep.h"
26 #include "qemu/units.h"
27 #include "qapi/error.h"
28 #include "exec/exec-all.h"
30 #if !defined(CONFIG_USER_ONLY)
31 #include "hw/boards.h"
33 #include "tcg-internal.h"
36 struct tcg_region_tree {
39 /* padding to avoid false sharing is computed at run-time */
43 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
44 * dynamically allocate from as demand dictates. Given appropriate region
45 * sizing, this minimizes flushes even when some TCG threads generate a lot
46 * more code than others.
48 struct tcg_region_state {
51 /* fields set at init time */
56 size_t size; /* size of one region */
57 size_t stride; /* .size + guard size */
59 /* fields protected by the lock */
60 size_t current; /* current region index */
61 size_t agg_size_full; /* aggregate size of full regions */
64 static struct tcg_region_state region;
67 * This is an array of struct tcg_region_tree's, with padding.
68 * We use void * to simplify the computation of region_trees[i]; each
69 * struct is found every tree_size bytes.
71 static void *region_trees;
72 static size_t tree_size;
74 /* compare a pointer @ptr and a tb_tc @s */
75 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
77 if (ptr >= s->ptr + s->size) {
79 } else if (ptr < s->ptr) {
85 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
87 const struct tb_tc *a = ap;
88 const struct tb_tc *b = bp;
91 * When both sizes are set, we know this isn't a lookup.
92 * This is the most likely case: every TB must be inserted; lookups
93 * are a lot less frequent.
95 if (likely(a->size && b->size)) {
96 if (a->ptr > b->ptr) {
98 } else if (a->ptr < b->ptr) {
101 /* a->ptr == b->ptr should happen only on deletions */
102 g_assert(a->size == b->size);
106 * All lookups have either .size field set to 0.
107 * From the glib sources we see that @ap is always the lookup key. However
108 * the docs provide no guarantee, so we just mark this case as likely.
110 if (likely(a->size == 0)) {
111 return ptr_cmp_tb_tc(a->ptr, b);
113 return ptr_cmp_tb_tc(b->ptr, a);
116 static void tcg_region_trees_init(void)
120 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
121 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
122 for (i = 0; i < region.n; i++) {
123 struct tcg_region_tree *rt = region_trees + i * tree_size;
125 qemu_mutex_init(&rt->lock);
126 rt->tree = g_tree_new(tb_tc_cmp);
130 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
135 * Like tcg_splitwx_to_rw, with no assert. The pc may come from
136 * a signal handler over which the caller has no control.
138 if (!in_code_gen_buffer(p)) {
139 p -= tcg_splitwx_diff;
140 if (!in_code_gen_buffer(p)) {
145 if (p < region.start_aligned) {
148 ptrdiff_t offset = p - region.start_aligned;
150 if (offset > region.stride * (region.n - 1)) {
151 region_idx = region.n - 1;
153 region_idx = offset / region.stride;
156 return region_trees + region_idx * tree_size;
159 void tcg_tb_insert(TranslationBlock *tb)
161 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
163 g_assert(rt != NULL);
164 qemu_mutex_lock(&rt->lock);
165 g_tree_insert(rt->tree, &tb->tc, tb);
166 qemu_mutex_unlock(&rt->lock);
169 void tcg_tb_remove(TranslationBlock *tb)
171 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
173 g_assert(rt != NULL);
174 qemu_mutex_lock(&rt->lock);
175 g_tree_remove(rt->tree, &tb->tc);
176 qemu_mutex_unlock(&rt->lock);
180 * Find the TB 'tb' such that
181 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
182 * Return NULL if not found.
184 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
186 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
187 TranslationBlock *tb;
188 struct tb_tc s = { .ptr = (void *)tc_ptr };
194 qemu_mutex_lock(&rt->lock);
195 tb = g_tree_lookup(rt->tree, &s);
196 qemu_mutex_unlock(&rt->lock);
200 static void tcg_region_tree_lock_all(void)
204 for (i = 0; i < region.n; i++) {
205 struct tcg_region_tree *rt = region_trees + i * tree_size;
207 qemu_mutex_lock(&rt->lock);
211 static void tcg_region_tree_unlock_all(void)
215 for (i = 0; i < region.n; i++) {
216 struct tcg_region_tree *rt = region_trees + i * tree_size;
218 qemu_mutex_unlock(&rt->lock);
222 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
226 tcg_region_tree_lock_all();
227 for (i = 0; i < region.n; i++) {
228 struct tcg_region_tree *rt = region_trees + i * tree_size;
230 g_tree_foreach(rt->tree, func, user_data);
232 tcg_region_tree_unlock_all();
235 size_t tcg_nb_tbs(void)
240 tcg_region_tree_lock_all();
241 for (i = 0; i < region.n; i++) {
242 struct tcg_region_tree *rt = region_trees + i * tree_size;
244 nb_tbs += g_tree_nnodes(rt->tree);
246 tcg_region_tree_unlock_all();
250 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
252 TranslationBlock *tb = v;
258 static void tcg_region_tree_reset_all(void)
262 tcg_region_tree_lock_all();
263 for (i = 0; i < region.n; i++) {
264 struct tcg_region_tree *rt = region_trees + i * tree_size;
266 g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
267 /* Increment the refcount first so that destroy acts as a reset */
268 g_tree_ref(rt->tree);
269 g_tree_destroy(rt->tree);
271 tcg_region_tree_unlock_all();
274 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
278 start = region.start_aligned + curr_region * region.stride;
279 end = start + region.size;
281 if (curr_region == 0) {
282 start = region.start;
284 if (curr_region == region.n - 1) {
292 static void tcg_region_assign(TCGContext *s, size_t curr_region)
296 tcg_region_bounds(curr_region, &start, &end);
298 s->code_gen_buffer = start;
299 s->code_gen_ptr = start;
300 s->code_gen_buffer_size = end - start;
301 s->code_gen_highwater = end - TCG_HIGHWATER;
304 static bool tcg_region_alloc__locked(TCGContext *s)
306 if (region.current == region.n) {
309 tcg_region_assign(s, region.current);
315 * Request a new region once the one in use has filled up.
316 * Returns true on error.
318 bool tcg_region_alloc(TCGContext *s)
321 /* read the region size now; alloc__locked will overwrite it on success */
322 size_t size_full = s->code_gen_buffer_size;
324 qemu_mutex_lock(®ion.lock);
325 err = tcg_region_alloc__locked(s);
327 region.agg_size_full += size_full - TCG_HIGHWATER;
329 qemu_mutex_unlock(®ion.lock);
334 * Perform a context's first region allocation.
335 * This function does _not_ increment region.agg_size_full.
337 static void tcg_region_initial_alloc__locked(TCGContext *s)
339 bool err = tcg_region_alloc__locked(s);
343 void tcg_region_initial_alloc(TCGContext *s)
345 qemu_mutex_lock(®ion.lock);
346 tcg_region_initial_alloc__locked(s);
347 qemu_mutex_unlock(®ion.lock);
350 /* Call from a safe-work context */
351 void tcg_region_reset_all(void)
353 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
356 qemu_mutex_lock(®ion.lock);
358 region.agg_size_full = 0;
360 for (i = 0; i < n_ctxs; i++) {
361 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
362 tcg_region_initial_alloc__locked(s);
364 qemu_mutex_unlock(®ion.lock);
366 tcg_region_tree_reset_all();
369 #ifdef CONFIG_USER_ONLY
370 static size_t tcg_n_regions(void)
376 * It is likely that some vCPUs will translate more code than others, so we
377 * first try to set more regions than max_cpus, with those regions being of
378 * reasonable size. If that's not possible we make do by evenly dividing
379 * the code_gen_buffer among the vCPUs.
381 static size_t tcg_n_regions(void)
385 /* Use a single region if all we have is one vCPU thread */
386 #if !defined(CONFIG_USER_ONLY)
387 MachineState *ms = MACHINE(qdev_get_machine());
388 unsigned int max_cpus = ms->smp.max_cpus;
390 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
394 /* Try to have more regions than max_cpus, with each region being >= 2 MB */
395 for (i = 8; i > 0; i--) {
396 size_t regions_per_thread = i;
399 region_size = tcg_init_ctx.code_gen_buffer_size;
400 region_size /= max_cpus * regions_per_thread;
402 if (region_size >= 2 * 1024u * 1024) {
403 return max_cpus * regions_per_thread;
406 /* If we can't, then just allocate one region per vCPU thread */
412 * Minimum size of the code gen buffer. This number is randomly chosen,
413 * but not so small that we can't have a fair number of TB's live.
415 #define MIN_CODE_GEN_BUFFER_SIZE (1 * MiB)
418 * Maximum size of the code gen buffer we'd like to use. Unless otherwise
419 * indicated, this is constrained by the range of direct branches on the
420 * host cpu, as used by the TCG implementation of goto_tb.
422 #if defined(__x86_64__)
423 # define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
424 #elif defined(__sparc__)
425 # define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
426 #elif defined(__powerpc64__)
427 # define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
428 #elif defined(__powerpc__)
429 # define MAX_CODE_GEN_BUFFER_SIZE (32 * MiB)
430 #elif defined(__aarch64__)
431 # define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB)
432 #elif defined(__s390x__)
433 /* We have a +- 4GB range on the branches; leave some slop. */
434 # define MAX_CODE_GEN_BUFFER_SIZE (3 * GiB)
435 #elif defined(__mips__)
437 * We have a 256MB branch region, but leave room to make sure the
438 * main executable is also within that region.
440 # define MAX_CODE_GEN_BUFFER_SIZE (128 * MiB)
442 # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
445 #if TCG_TARGET_REG_BITS == 32
446 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
447 #ifdef CONFIG_USER_ONLY
449 * For user mode on smaller 32 bit systems we may run into trouble
450 * allocating big chunks of data in the right place. On these systems
451 * we utilise a static code generation buffer directly in the binary.
453 #define USE_STATIC_CODE_GEN_BUFFER
455 #else /* TCG_TARGET_REG_BITS == 64 */
456 #ifdef CONFIG_USER_ONLY
458 * As user-mode emulation typically means running multiple instances
459 * of the translator don't go too nuts with our default code gen
460 * buffer lest we make things too hard for the OS.
462 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB)
465 * We expect most system emulation to run one or two guests per host.
466 * Users running large scale system emulation may want to tweak their
467 * runtime setup via the tb-size control on the command line.
469 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB)
473 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
474 (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
475 ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
477 static size_t size_code_gen_buffer(size_t tb_size)
479 /* Size the buffer. */
481 size_t phys_mem = qemu_get_host_physmem();
483 tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
485 tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, phys_mem / 8);
488 if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
489 tb_size = MIN_CODE_GEN_BUFFER_SIZE;
491 if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
492 tb_size = MAX_CODE_GEN_BUFFER_SIZE;
499 * In order to use J and JAL within the code_gen_buffer, we require
500 * that the buffer not cross a 256MB boundary.
502 static inline bool cross_256mb(void *addr, size_t size)
504 return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & ~0x0ffffffful;
508 * We weren't able to allocate a buffer without crossing that boundary,
509 * so make do with the larger portion of the buffer that doesn't cross.
510 * Returns the new base of the buffer, and adjusts code_gen_buffer_size.
512 static inline void *split_cross_256mb(void *buf1, size_t size1)
514 void *buf2 = (void *)(((uintptr_t)buf1 + size1) & ~0x0ffffffful);
515 size_t size2 = buf1 + size1 - buf2;
523 tcg_ctx->code_gen_buffer_size = size1;
528 #ifdef USE_STATIC_CODE_GEN_BUFFER
529 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
530 __attribute__((aligned(CODE_GEN_ALIGN)));
532 static bool alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
538 error_setg(errp, "jit split-wx not supported");
542 /* page-align the beginning and end of the buffer */
543 buf = static_code_gen_buffer;
544 end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
545 buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size);
546 end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size);
550 /* Honor a command-line option limiting the size of the buffer. */
551 if (size > tb_size) {
552 size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size);
554 tcg_ctx->code_gen_buffer_size = size;
557 if (cross_256mb(buf, size)) {
558 buf = split_cross_256mb(buf, size);
559 size = tcg_ctx->code_gen_buffer_size;
563 if (qemu_mprotect_rwx(buf, size)) {
564 error_setg_errno(errp, errno, "mprotect of jit buffer");
567 qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
569 tcg_ctx->code_gen_buffer = buf;
572 #elif defined(_WIN32)
573 static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
578 error_setg(errp, "jit split-wx not supported");
582 buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
583 PAGE_EXECUTE_READWRITE);
585 error_setg_win32(errp, GetLastError(),
586 "allocate %zu bytes for jit buffer", size);
590 tcg_ctx->code_gen_buffer = buf;
591 tcg_ctx->code_gen_buffer_size = size;
595 static bool alloc_code_gen_buffer_anon(size_t size, int prot,
596 int flags, Error **errp)
600 buf = mmap(NULL, size, prot, flags, -1, 0);
601 if (buf == MAP_FAILED) {
602 error_setg_errno(errp, errno,
603 "allocate %zu bytes for jit buffer", size);
606 tcg_ctx->code_gen_buffer_size = size;
609 if (cross_256mb(buf, size)) {
611 * Try again, with the original still mapped, to avoid re-acquiring
612 * the same 256mb crossing.
615 void *buf2 = mmap(NULL, size, prot, flags, -1, 0);
616 switch ((int)(buf2 != MAP_FAILED)) {
618 if (!cross_256mb(buf2, size)) {
619 /* Success! Use the new buffer. */
623 /* Failure. Work with what we had. */
627 /* Split the original buffer. Free the smaller half. */
628 buf2 = split_cross_256mb(buf, size);
629 size2 = tcg_ctx->code_gen_buffer_size;
631 munmap(buf + size2, size - size2);
633 munmap(buf, size - size2);
642 /* Request large pages for the buffer. */
643 qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
645 tcg_ctx->code_gen_buffer = buf;
649 #ifndef CONFIG_TCG_INTERPRETER
651 #include "qemu/memfd.h"
653 static bool alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
655 void *buf_rw = NULL, *buf_rx = MAP_FAILED;
659 /* Find space for the RX mapping, vs the 256MiB regions. */
660 if (!alloc_code_gen_buffer_anon(size, PROT_NONE,
661 MAP_PRIVATE | MAP_ANONYMOUS |
662 MAP_NORESERVE, errp)) {
665 /* The size of the mapping may have been adjusted. */
666 size = tcg_ctx->code_gen_buffer_size;
667 buf_rx = tcg_ctx->code_gen_buffer;
670 buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
671 if (buf_rw == NULL) {
676 void *tmp = mmap(buf_rx, size, PROT_READ | PROT_EXEC,
677 MAP_SHARED | MAP_FIXED, fd, 0);
682 buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
683 if (buf_rx == MAP_FAILED) {
689 tcg_ctx->code_gen_buffer = buf_rw;
690 tcg_ctx->code_gen_buffer_size = size;
691 tcg_splitwx_diff = buf_rx - buf_rw;
693 /* Request large pages for the buffer and the splitwx. */
694 qemu_madvise(buf_rw, size, QEMU_MADV_HUGEPAGE);
695 qemu_madvise(buf_rx, size, QEMU_MADV_HUGEPAGE);
699 error_setg_errno(errp, errno, "failed to map shared memory for execute");
701 if (buf_rx != MAP_FAILED) {
702 munmap(buf_rx, size);
705 munmap(buf_rw, size);
712 #endif /* CONFIG_POSIX */
715 #include <mach/mach.h>
717 extern kern_return_t mach_vm_remap(vm_map_t target_task,
718 mach_vm_address_t *target_address,
720 mach_vm_offset_t mask,
723 mach_vm_address_t src_address,
725 vm_prot_t *cur_protection,
726 vm_prot_t *max_protection,
727 vm_inherit_t inheritance);
729 static bool alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
732 mach_vm_address_t buf_rw, buf_rx;
733 vm_prot_t cur_prot, max_prot;
735 /* Map the read-write portion via normal anon memory. */
736 if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,
737 MAP_PRIVATE | MAP_ANONYMOUS, errp)) {
741 buf_rw = (mach_vm_address_t)tcg_ctx->code_gen_buffer;
743 ret = mach_vm_remap(mach_task_self(),
754 if (ret != KERN_SUCCESS) {
755 /* TODO: Convert "ret" to a human readable error message. */
756 error_setg(errp, "vm_remap for jit splitwx failed");
757 munmap((void *)buf_rw, size);
761 if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
762 error_setg_errno(errp, errno, "mprotect for jit splitwx");
763 munmap((void *)buf_rx, size);
764 munmap((void *)buf_rw, size);
768 tcg_splitwx_diff = buf_rx - buf_rw;
771 #endif /* CONFIG_DARWIN */
772 #endif /* CONFIG_TCG_INTERPRETER */
774 static bool alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
776 #ifndef CONFIG_TCG_INTERPRETER
777 # ifdef CONFIG_DARWIN
778 return alloc_code_gen_buffer_splitwx_vmremap(size, errp);
781 return alloc_code_gen_buffer_splitwx_memfd(size, errp);
784 error_setg(errp, "jit split-wx not supported");
788 static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
794 if (alloc_code_gen_buffer_splitwx(size, errp)) {
798 * If splitwx force-on (1), fail;
799 * if splitwx default-on (-1), fall through to splitwx off.
804 error_free_or_abort(errp);
807 prot = PROT_READ | PROT_WRITE | PROT_EXEC;
808 flags = MAP_PRIVATE | MAP_ANONYMOUS;
809 #ifdef CONFIG_TCG_INTERPRETER
810 /* The tcg interpreter does not need execute permission. */
811 prot = PROT_READ | PROT_WRITE;
812 #elif defined(CONFIG_DARWIN)
813 /* Applicable to both iOS and macOS (Apple Silicon). */
819 return alloc_code_gen_buffer_anon(size, prot, flags, errp);
821 #endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
824 * Initializes region partitioning.
826 * Called at init time from the parent thread (i.e. the one calling
827 * tcg_context_init), after the target's TCG globals have been set.
829 * Region partitioning works by splitting code_gen_buffer into separate regions,
830 * and then assigning regions to TCG threads so that the threads can translate
831 * code in parallel without synchronization.
833 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
834 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
835 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
836 * must have been parsed before calling this function, since it calls
837 * qemu_tcg_mttcg_enabled().
839 * In user-mode we use a single region. Having multiple regions in user-mode
840 * is not supported, because the number of vCPU threads (recall that each thread
841 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
842 * OS, and usually this number is huge (tens of thousands is not uncommon).
843 * Thus, given this large bound on the number of vCPU threads and the fact
844 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
845 * that the availability of at least one region per vCPU thread.
847 * However, this user-mode limitation is unlikely to be a significant problem
848 * in practice. Multi-threaded guests share most if not all of their translated
849 * code, which makes parallel code generation less appealing than in softmmu.
851 void tcg_region_init(size_t tb_size, int splitwx)
861 ok = alloc_code_gen_buffer(size_code_gen_buffer(tb_size),
862 splitwx, &error_fatal);
865 buf = tcg_init_ctx.code_gen_buffer;
866 size = tcg_init_ctx.code_gen_buffer_size;
867 page_size = qemu_real_host_page_size;
868 n_regions = tcg_n_regions();
870 /* The first region will be 'aligned - buf' bytes larger than the others */
871 aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
872 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
874 * Make region_size a multiple of page_size, using aligned as the start.
875 * As a result of this we might end up with a few extra pages at the end of
876 * the buffer; we will assign those to the last region.
878 region_size = (size - (aligned - buf)) / n_regions;
879 region_size = QEMU_ALIGN_DOWN(region_size, page_size);
881 /* A region must have at least 2 pages; one code, one guard */
882 g_assert(region_size >= 2 * page_size);
884 /* init the region struct */
885 qemu_mutex_init(®ion.lock);
886 region.n = n_regions;
887 region.size = region_size - page_size;
888 region.stride = region_size;
890 region.start_aligned = aligned;
891 /* page-align the end, since its last page will be a guard page */
892 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
893 /* account for that last guard page */
894 region.end -= page_size;
897 * Set guard pages in the rw buffer, as that's the one into which
898 * buffer overruns could occur. Do not set guard pages in the rx
899 * buffer -- let that one use hugepages throughout.
901 for (i = 0; i < region.n; i++) {
904 tcg_region_bounds(i, &start, &end);
907 * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
908 * rejects a permission change from RWX -> NONE. Guard pages are
909 * nice for bug detection but are not essential; ignore any failure.
911 (void)qemu_mprotect_none(end, page_size);
914 tcg_region_trees_init();
917 * Leave the initial context initialized to the first region.
918 * This will be the context into which we generate the prologue.
919 * It is also the only context for CONFIG_USER_ONLY.
921 tcg_region_initial_alloc__locked(&tcg_init_ctx);
924 void tcg_region_prologue_set(TCGContext *s)
926 /* Deduct the prologue from the first region. */
927 g_assert(region.start == s->code_gen_buffer);
928 region.start = s->code_ptr;
930 /* Recompute boundaries of the first region. */
931 tcg_region_assign(s, 0);
933 /* Register the balance of the buffer with gdb. */
934 tcg_register_jit(tcg_splitwx_to_rx(region.start),
935 region.end - region.start);
939 * Returns the size (in bytes) of all translated code (i.e. from all regions)
940 * currently in the cache.
941 * See also: tcg_code_capacity()
942 * Do not confuse with tcg_current_code_size(); that one applies to a single
945 size_t tcg_code_size(void)
947 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
951 qemu_mutex_lock(®ion.lock);
952 total = region.agg_size_full;
953 for (i = 0; i < n_ctxs; i++) {
954 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
957 size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
958 g_assert(size <= s->code_gen_buffer_size);
961 qemu_mutex_unlock(®ion.lock);
966 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
968 * See also: tcg_code_size()
970 size_t tcg_code_capacity(void)
972 size_t guard_size, capacity;
974 /* no need for synchronization; these variables are set at init time */
975 guard_size = region.stride - region.size;
976 capacity = region.end + guard_size - region.start;
977 capacity -= region.n * (guard_size + TCG_HIGHWATER);
981 size_t tcg_tb_phys_invalidate_count(void)
983 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
987 for (i = 0; i < n_ctxs; i++) {
988 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
990 total += qatomic_read(&s->tb_phys_invalidate_count);