1 /*-------------------------------------------------------------------------
4 * POSTGRES free space map for quickly finding free space in relations
7 * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.44 2005/04/24 03:51:49 momjian Exp $
16 * The only really interesting aspect of this code is the heuristics for
17 * deciding how much information we can afford to keep about each relation,
18 * given that we have a limited amount of workspace in shared memory.
19 * These currently work as follows:
21 * The number of distinct relations tracked is limited by a configuration
22 * variable (MaxFSMRelations). When this would be exceeded, we discard the
23 * least recently used relation. A doubly-linked list with move-to-front
24 * behavior keeps track of which relation is least recently used.
26 * For each known relation, we track the average request size given to
27 * GetPageWithFreeSpace() as well as the most recent number of pages given
28 * to RecordRelationFreeSpace(). The average request size is not directly
29 * used in this module, but we expect VACUUM to use it to filter out
30 * uninteresting amounts of space before calling RecordRelationFreeSpace().
31 * The sum of the RRFS page counts is thus the total number of "interesting"
32 * pages that we would like to track; this is called DesiredFSMPages.
34 * The number of pages actually tracked is limited by a configuration variable
35 * (MaxFSMPages). When this is less than DesiredFSMPages, each relation
36 * gets to keep a fraction MaxFSMPages/DesiredFSMPages of its free pages.
37 * We discard pages with less free space to reach this target.
39 * Actually, our space allocation is done in "chunks" of CHUNKPAGES pages,
40 * with each relation guaranteed at least one chunk. This reduces thrashing
41 * of the storage allocations when there are small changes in the RRFS page
42 * counts from one VACUUM to the next. (XXX it might also be worthwhile to
43 * impose some kind of moving-average smoothing on the RRFS page counts?)
45 * So the actual arithmetic is: for each relation compute myRequest as the
46 * number of chunks needed to hold its RRFS page count (not counting the
47 * first, guaranteed chunk); compute sumRequests as the sum of these values
48 * over all relations; then for each relation figure its target allocation
50 * 1 + round(spareChunks * myRequest / sumRequests)
51 * where spareChunks = totalChunks - numRels is the number of chunks we have
52 * a choice what to do with. We round off these numbers because truncating
53 * all of them would waste significant space. But because of roundoff, it's
54 * possible for the last few relations to get less space than they should;
55 * the target allocation must be checked against remaining available space.
57 *-------------------------------------------------------------------------
66 #include "miscadmin.h"
67 #include "storage/fd.h"
68 #include "storage/freespace.h"
69 #include "storage/itemptr.h"
70 #include "storage/lwlock.h"
71 #include "storage/shmem.h"
74 /* Initial value for average-request moving average */
75 #define INITIAL_AVERAGE ((Size) (BLCKSZ / 32))
78 * Number of pages and bytes per allocation chunk. Indexes can squeeze 50%
79 * more pages into the same space because they don't need to remember how much
80 * free space on each page. The nominal number of pages, CHUNKPAGES, is for
81 * regular rels, and INDEXCHUNKPAGES is for indexes. CHUNKPAGES should be
82 * even so that no space is wasted in the index case.
85 #define CHUNKBYTES (CHUNKPAGES * sizeof(FSMPageData))
86 #define INDEXCHUNKPAGES ((int) (CHUNKBYTES / sizeof(IndexFSMPageData)))
90 * Typedefs and macros for items in the page-storage arena. We use the
91 * existing ItemPointer and BlockId data structures, which are designed
92 * to pack well (they should be 6 and 4 bytes apiece regardless of machine
93 * alignment issues). Unfortunately we can't use the ItemPointer access
94 * macros, because they include Asserts insisting that ip_posid != 0.
96 typedef ItemPointerData FSMPageData;
97 typedef BlockIdData IndexFSMPageData;
99 #define FSMPageGetPageNum(ptr) \
100 BlockIdGetBlockNumber(&(ptr)->ip_blkid)
101 #define FSMPageGetSpace(ptr) \
102 ((Size) (ptr)->ip_posid)
103 #define FSMPageSetPageNum(ptr, pg) \
104 BlockIdSet(&(ptr)->ip_blkid, pg)
105 #define FSMPageSetSpace(ptr, sz) \
106 ((ptr)->ip_posid = (OffsetNumber) (sz))
107 #define IndexFSMPageGetPageNum(ptr) \
108 BlockIdGetBlockNumber(ptr)
109 #define IndexFSMPageSetPageNum(ptr, pg) \
113 * During database shutdown, we store the contents of FSM into a disk file,
114 * which is re-read during startup. This way we don't have a startup
115 * transient condition where FSM isn't really functioning.
117 * The file format is:
119 * endian constant 0x01020304 for detecting endianness problems
122 * -- for each rel, in *reverse* usage order:
128 * arena data array of storedPages FSMPageData or IndexFSMPageData
132 /* Name of FSM cache file (relative to $PGDATA) */
133 #define FSM_CACHE_FILENAME "global/pg_fsm.cache"
135 /* Fixed values in header */
136 #define FSM_CACHE_LABEL "FSM"
137 #define FSM_CACHE_ENDIAN 0x01020304
138 #define FSM_CACHE_VERSION 20030305
140 /* File header layout */
141 typedef struct FsmCacheFileHeader
147 } FsmCacheFileHeader;
149 /* Per-relation header */
150 typedef struct FsmCacheRelHeader
152 RelFileNode key; /* hash key (must be first) */
153 bool isIndex; /* if true, we store only page numbers */
154 uint32 avgRequest; /* moving average of space requests */
155 int32 lastPageCount; /* pages passed to RecordRelationFreeSpace */
156 int32 storedPages; /* # of pages stored in arena */
161 * Shared free-space-map objects
163 * The per-relation objects are indexed by a hash table, and are also members
164 * of two linked lists: one ordered by recency of usage (most recent first),
165 * and the other ordered by physical location of the associated storage in
166 * the page-info arena.
168 * Each relation owns one or more chunks of per-page storage in the "arena".
169 * The chunks for each relation are always consecutive, so that it can treat
170 * its page storage as a simple array. We further insist that its page data
171 * be ordered by block number, so that binary search is possible.
173 * Note: we handle pointers to these items as pointers, not as SHMEM_OFFSETs.
174 * This assumes that all processes accessing the map will have the shared
175 * memory segment mapped at the same place in their address space.
177 typedef struct FSMHeader FSMHeader;
178 typedef struct FSMRelation FSMRelation;
180 /* Header for whole map */
183 FSMRelation *usageList; /* FSMRelations in usage-recency order */
184 FSMRelation *usageListTail; /* tail of usage-recency list */
185 FSMRelation *firstRel; /* FSMRelations in arena storage order */
186 FSMRelation *lastRel; /* tail of storage-order list */
187 int numRels; /* number of FSMRelations now in use */
188 double sumRequests; /* sum of requested chunks over all rels */
189 char *arena; /* arena for page-info storage */
190 int totalChunks; /* total size of arena, in chunks */
191 int usedChunks; /* # of chunks assigned */
192 /* NB: there are totalChunks - usedChunks free chunks at end of arena */
196 * Per-relation struct --- this is an entry in the shared hash table.
197 * The hash key is the RelFileNode value (hence, we look at the physical
198 * relation ID, not the logical ID, which is appropriate).
202 RelFileNode key; /* hash key (must be first) */
203 FSMRelation *nextUsage; /* next rel in usage-recency order */
204 FSMRelation *priorUsage; /* prior rel in usage-recency order */
205 FSMRelation *nextPhysical; /* next rel in arena-storage order */
206 FSMRelation *priorPhysical; /* prior rel in arena-storage order */
207 bool isIndex; /* if true, we store only page numbers */
208 Size avgRequest; /* moving average of space requests */
209 int lastPageCount; /* pages passed to RecordRelationFreeSpace */
210 int firstChunk; /* chunk # of my first chunk in arena */
211 int storedPages; /* # of pages stored in arena */
212 int nextPage; /* index (from 0) to start next search at */
216 int MaxFSMRelations; /* these are set by guc.c */
219 static FSMHeader *FreeSpaceMap; /* points to FSMHeader in shared memory */
220 static HTAB *FreeSpaceMapRelHash; /* points to (what used to be)
221 * FSMHeader->relHash */
224 static void CheckFreeSpaceMapStatistics(int elevel, int numRels,
226 static FSMRelation *lookup_fsm_rel(RelFileNode *rel);
227 static FSMRelation *create_fsm_rel(RelFileNode *rel);
228 static void delete_fsm_rel(FSMRelation *fsmrel);
229 static int realloc_fsm_rel(FSMRelation *fsmrel, int nPages, bool isIndex);
230 static void link_fsm_rel_usage(FSMRelation *fsmrel);
231 static void unlink_fsm_rel_usage(FSMRelation *fsmrel);
232 static void link_fsm_rel_storage(FSMRelation *fsmrel);
233 static void unlink_fsm_rel_storage(FSMRelation *fsmrel);
234 static BlockNumber find_free_space(FSMRelation *fsmrel, Size spaceNeeded);
235 static BlockNumber find_index_free_space(FSMRelation *fsmrel);
236 static void fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page,
238 static bool lookup_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page,
240 static void compact_fsm_storage(void);
241 static void push_fsm_rels_after(FSMRelation *afterRel);
242 static void pack_incoming_pages(FSMPageData *newLocation, int newPages,
243 PageFreeSpaceInfo *pageSpaces, int nPages);
244 static void pack_existing_pages(FSMPageData *newLocation, int newPages,
245 FSMPageData *oldLocation, int oldPages);
246 static int fsm_calc_request(FSMRelation *fsmrel);
247 static int fsm_calc_target_allocation(int myRequest);
248 static int fsm_current_chunks(FSMRelation *fsmrel);
249 static int fsm_current_allocation(FSMRelation *fsmrel);
258 * InitFreeSpaceMap -- Initialize the freespace module.
260 * This must be called once during shared memory initialization.
261 * It builds the empty free space map table. FreeSpaceLock must also be
262 * initialized at some point, but is not touched here --- we assume there is
263 * no need for locking, since only the calling process can be accessing shared
267 InitFreeSpaceMap(void)
273 /* Create table header */
274 FreeSpaceMap = (FSMHeader *) ShmemInitStruct("Free Space Map Header", sizeof(FSMHeader), &found);
275 if (FreeSpaceMap == NULL)
277 (errcode(ERRCODE_OUT_OF_MEMORY),
278 errmsg("insufficient shared memory for free space map")));
280 MemSet(FreeSpaceMap, 0, sizeof(FSMHeader));
282 /* Create hashtable for FSMRelations */
283 info.keysize = sizeof(RelFileNode);
284 info.entrysize = sizeof(FSMRelation);
285 info.hash = tag_hash;
287 FreeSpaceMapRelHash = ShmemInitHash("Free Space Map Hash",
291 (HASH_ELEM | HASH_FUNCTION));
293 if (!FreeSpaceMapRelHash)
295 (errcode(ERRCODE_OUT_OF_MEMORY),
296 errmsg("insufficient shared memory for free space map")));
302 /* Allocate page-storage arena */
303 nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1;
304 /* This check ensures spareChunks will be greater than zero */
305 if (nchunks <= MaxFSMRelations)
307 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
308 errmsg("max_fsm_pages must exceed max_fsm_relations * %d",
311 FreeSpaceMap->arena = (char *) ShmemAlloc(nchunks * CHUNKBYTES);
312 if (FreeSpaceMap->arena == NULL)
314 (errcode(ERRCODE_OUT_OF_MEMORY),
315 errmsg("insufficient shared memory for free space map")));
317 FreeSpaceMap->totalChunks = nchunks;
318 FreeSpaceMap->usedChunks = 0;
319 FreeSpaceMap->sumRequests = 0;
323 * Estimate amount of shmem space needed for FSM.
326 FreeSpaceShmemSize(void)
332 size = MAXALIGN(sizeof(FSMHeader));
334 /* hash table, including the FSMRelation objects */
335 size += hash_estimate_size(MaxFSMRelations + 1, sizeof(FSMRelation));
337 /* page-storage arena */
338 nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1;
340 if (nchunks >= (INT_MAX / CHUNKBYTES))
342 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
343 errmsg("max_fsm_pages is too large")));
345 size += MAXALIGN(nchunks * CHUNKBYTES);
351 * GetPageWithFreeSpace - try to find a page in the given relation with
352 * at least the specified amount of free space.
354 * If successful, return the block number; if not, return InvalidBlockNumber.
356 * The caller must be prepared for the possibility that the returned page
357 * will turn out to have too little space available by the time the caller
358 * gets a lock on it. In that case, the caller should report the actual
359 * amount of free space available on that page and then try again (see
360 * RecordAndGetPageWithFreeSpace). If InvalidBlockNumber is returned,
361 * extend the relation.
364 GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded)
367 BlockNumber freepage;
369 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
372 * We always add a rel to the hashtable when it is inquired about.
374 fsmrel = create_fsm_rel(rel);
377 * Update the moving average of space requests. This code implements
378 * an exponential moving average with an equivalent period of about 63
379 * requests. Ignore silly requests, however, to ensure that the
380 * average stays sane.
382 if (spaceNeeded > 0 && spaceNeeded < BLCKSZ)
384 int cur_avg = (int) fsmrel->avgRequest;
386 cur_avg += ((int) spaceNeeded - cur_avg) / 32;
387 fsmrel->avgRequest = (Size) cur_avg;
389 freepage = find_free_space(fsmrel, spaceNeeded);
390 LWLockRelease(FreeSpaceLock);
395 * RecordAndGetPageWithFreeSpace - update info about a page and try again.
397 * We provide this combo form, instead of a separate Record operation,
398 * to save one lock and hash table lookup cycle.
401 RecordAndGetPageWithFreeSpace(RelFileNode *rel,
407 BlockNumber freepage;
409 /* Sanity check: ensure spaceAvail will fit into OffsetNumber */
410 AssertArg(oldSpaceAvail < BLCKSZ);
412 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
415 * We always add a rel to the hashtable when it is inquired about.
417 fsmrel = create_fsm_rel(rel);
420 fsm_record_free_space(fsmrel, oldPage, oldSpaceAvail);
423 * Update the moving average of space requests, same as in
424 * GetPageWithFreeSpace.
426 if (spaceNeeded > 0 && spaceNeeded < BLCKSZ)
428 int cur_avg = (int) fsmrel->avgRequest;
430 cur_avg += ((int) spaceNeeded - cur_avg) / 32;
431 fsmrel->avgRequest = (Size) cur_avg;
434 freepage = find_free_space(fsmrel, spaceNeeded);
435 LWLockRelease(FreeSpaceLock);
440 * GetAvgFSMRequestSize - get average FSM request size for a relation.
442 * If the relation is not known to FSM, return a default value.
445 GetAvgFSMRequestSize(RelFileNode *rel)
450 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
451 fsmrel = lookup_fsm_rel(rel);
453 result = fsmrel->avgRequest;
455 result = INITIAL_AVERAGE;
456 LWLockRelease(FreeSpaceLock);
461 * RecordRelationFreeSpace - record available-space info about a relation.
463 * Any pre-existing info about the relation is assumed obsolete and discarded.
465 * The given pageSpaces[] array must be sorted in order by blkno. Note that
466 * the FSM is at liberty to discard some or all of the data.
469 RecordRelationFreeSpace(RelFileNode *rel,
471 PageFreeSpaceInfo *pageSpaces)
475 /* Limit nPages to something sane */
478 else if (nPages > MaxFSMPages)
479 nPages = MaxFSMPages;
481 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
484 * Note we don't record info about a relation unless there's already
485 * an FSM entry for it, implying someone has done GetPageWithFreeSpace
486 * for it. Inactive rels thus will not clutter the map simply by
489 fsmrel = lookup_fsm_rel(rel);
494 FSMPageData *newLocation;
496 curAlloc = realloc_fsm_rel(fsmrel, nPages, false);
497 curAllocPages = curAlloc * CHUNKPAGES;
500 * If the data fits in our current allocation, just copy it;
501 * otherwise must compress.
503 newLocation = (FSMPageData *)
504 (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
505 if (nPages <= curAllocPages)
509 for (i = 0; i < nPages; i++)
511 BlockNumber page = pageSpaces[i].blkno;
512 Size avail = pageSpaces[i].avail;
514 /* Check caller provides sorted data */
515 if (i > 0 && page <= pageSpaces[i - 1].blkno)
516 elog(ERROR, "free-space data is not in page order");
517 FSMPageSetPageNum(newLocation, page);
518 FSMPageSetSpace(newLocation, avail);
521 fsmrel->storedPages = nPages;
525 pack_incoming_pages(newLocation, curAllocPages,
527 fsmrel->storedPages = curAllocPages;
530 LWLockRelease(FreeSpaceLock);
534 * GetFreeIndexPage - like GetPageWithFreeSpace, but for indexes
537 GetFreeIndexPage(RelFileNode *rel)
540 BlockNumber freepage;
542 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
545 * We always add a rel to the hashtable when it is inquired about.
547 fsmrel = create_fsm_rel(rel);
549 freepage = find_index_free_space(fsmrel);
550 LWLockRelease(FreeSpaceLock);
555 * RecordIndexFreeSpace - like RecordRelationFreeSpace, but for indexes
558 RecordIndexFreeSpace(RelFileNode *rel,
564 /* Limit nPages to something sane */
567 else if (nPages > MaxFSMPages)
568 nPages = MaxFSMPages;
570 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
573 * Note we don't record info about a relation unless there's already
574 * an FSM entry for it, implying someone has done GetFreeIndexPage for
575 * it. Inactive rels thus will not clutter the map simply by being
578 fsmrel = lookup_fsm_rel(rel);
584 IndexFSMPageData *newLocation;
586 curAlloc = realloc_fsm_rel(fsmrel, nPages, true);
587 curAllocPages = curAlloc * INDEXCHUNKPAGES;
590 * If the data fits in our current allocation, just copy it;
591 * otherwise must compress. But compression is easy: we merely
592 * forget extra pages.
594 newLocation = (IndexFSMPageData *)
595 (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
596 if (nPages > curAllocPages)
597 nPages = curAllocPages;
599 for (i = 0; i < nPages; i++)
601 BlockNumber page = pages[i];
603 /* Check caller provides sorted data */
604 if (i > 0 && page <= pages[i - 1])
605 elog(ERROR, "free-space data is not in page order");
606 IndexFSMPageSetPageNum(newLocation, page);
609 fsmrel->storedPages = nPages;
611 LWLockRelease(FreeSpaceLock);
615 * FreeSpaceMapTruncateRel - adjust for truncation of a relation.
617 * We need to delete any stored data past the new relation length, so that
618 * we don't bogusly return removed block numbers.
621 FreeSpaceMapTruncateRel(RelFileNode *rel, BlockNumber nblocks)
625 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
626 fsmrel = lookup_fsm_rel(rel);
631 /* Use lookup to locate first entry >= nblocks */
632 (void) lookup_fsm_page_entry(fsmrel, nblocks, &pageIndex);
633 /* Delete all such entries */
634 fsmrel->storedPages = pageIndex;
635 /* XXX should we adjust rel's lastPageCount and sumRequests? */
637 LWLockRelease(FreeSpaceLock);
641 * FreeSpaceMapForgetRel - forget all about a relation.
643 * This is called when a relation is deleted. Although we could just let
644 * the rel age out of the map, it's better to reclaim and reuse the space
648 FreeSpaceMapForgetRel(RelFileNode *rel)
652 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
653 fsmrel = lookup_fsm_rel(rel);
655 delete_fsm_rel(fsmrel);
656 LWLockRelease(FreeSpaceLock);
660 * FreeSpaceMapForgetDatabase - forget all relations of a database.
662 * This is called during DROP DATABASE. As above, might as well reclaim
663 * map space sooner instead of later.
666 FreeSpaceMapForgetDatabase(Oid dbid)
671 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
672 for (fsmrel = FreeSpaceMap->usageList; fsmrel; fsmrel = nextrel)
674 nextrel = fsmrel->nextUsage; /* in case we delete it */
675 if (fsmrel->key.dbNode == dbid)
676 delete_fsm_rel(fsmrel);
678 LWLockRelease(FreeSpaceLock);
682 * PrintFreeSpaceMapStatistics - print statistics about FSM contents
684 * The info is sent to ereport() with the specified message level. This is
685 * intended for use during VACUUM.
688 PrintFreeSpaceMapStatistics(int elevel)
696 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
697 /* Count total space used --- tedious, but seems useful */
698 for (fsmrel = FreeSpaceMap->firstRel;
700 fsmrel = fsmrel->nextPhysical)
701 storedPages += fsmrel->storedPages;
703 /* Copy other stats before dropping lock */
704 numRels = FreeSpaceMap->numRels;
705 sumRequests = FreeSpaceMap->sumRequests;
706 LWLockRelease(FreeSpaceLock);
708 /* Convert stats to actual number of page slots needed */
709 needed = (sumRequests + numRels) * CHUNKPAGES;
712 (errmsg("free space map contains %d pages in %d relations",
713 storedPages, numRels),
714 errdetail("A total of %.0f page slots are in use (including overhead).\n"
715 "%.0f page slots are required to track all free space.\n"
716 "Current limits are: %d page slots, %d relations, using %.0f KB.",
717 Min(needed, MaxFSMPages),
718 needed, MaxFSMPages, MaxFSMRelations,
719 (double) FreeSpaceShmemSize() / 1024.0)));
721 CheckFreeSpaceMapStatistics(NOTICE, numRels, needed);
722 /* Print to server logs too because is deals with a config variable. */
723 CheckFreeSpaceMapStatistics(LOG, numRels, needed);
727 CheckFreeSpaceMapStatistics(int elevel, int numRels, double needed)
729 if (numRels == MaxFSMRelations)
731 (errmsg("max_fsm_relations(%d) equals the number of relations checked",
733 errhint("You have >= %d relations.\n"
734 "Consider increasing the configuration parameter \"max_fsm_relations\".",
736 else if (needed > MaxFSMPages)
738 (errmsg("the number of page slots needed (%.0f) exceeds max_fsm_pages (%d)",
740 errhint("Consider increasing the configuration parameter \"max_fsm_relations\"\n"
741 "to a value over %.0f.", needed)));
745 * DumpFreeSpaceMap - dump contents of FSM into a disk file for later reload
747 * This is expected to be called during database shutdown, after updates to
748 * the FSM have stopped. We lock the FreeSpaceLock but that's purely pro
749 * forma --- if anyone else is still accessing FSM, there's a problem.
752 DumpFreeSpaceMap(int code, Datum arg)
755 char cachefilename[MAXPGPATH];
756 FsmCacheFileHeader header;
759 /* Try to create file */
760 snprintf(cachefilename, sizeof(cachefilename), "%s/%s",
761 DataDir, FSM_CACHE_FILENAME);
763 unlink(cachefilename); /* in case it exists w/wrong permissions */
765 fp = AllocateFile(cachefilename, PG_BINARY_W);
768 elog(LOG, "could not write \"%s\": %m", cachefilename);
772 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
774 /* Write file header */
775 MemSet(&header, 0, sizeof(header));
776 strcpy(header.label, FSM_CACHE_LABEL);
777 header.endian = FSM_CACHE_ENDIAN;
778 header.version = FSM_CACHE_VERSION;
779 header.numRels = FreeSpaceMap->numRels;
780 if (fwrite(&header, 1, sizeof(header), fp) != sizeof(header))
783 /* For each relation, in order from least to most recently used... */
784 for (fsmrel = FreeSpaceMap->usageListTail;
786 fsmrel = fsmrel->priorUsage)
788 FsmCacheRelHeader relheader;
791 /* Write relation header */
792 MemSet(&relheader, 0, sizeof(relheader));
793 relheader.key = fsmrel->key;
794 relheader.isIndex = fsmrel->isIndex;
795 relheader.avgRequest = fsmrel->avgRequest;
796 relheader.lastPageCount = fsmrel->lastPageCount;
797 relheader.storedPages = fsmrel->storedPages;
798 if (fwrite(&relheader, 1, sizeof(relheader), fp) != sizeof(relheader))
801 /* Write the per-page data directly from the arena */
802 nPages = fsmrel->storedPages;
809 len = nPages * sizeof(IndexFSMPageData);
811 len = nPages * sizeof(FSMPageData);
813 (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
814 if (fwrite(data, 1, len, fp) != len)
820 LWLockRelease(FreeSpaceLock);
824 elog(LOG, "could not write \"%s\": %m", cachefilename);
825 /* Remove busted cache file */
826 unlink(cachefilename);
832 elog(LOG, "could not write \"%s\": %m", cachefilename);
835 LWLockRelease(FreeSpaceLock);
839 /* Remove busted cache file */
840 unlink(cachefilename);
844 * LoadFreeSpaceMap - load contents of FSM from a disk file
846 * This is expected to be called during database startup, before any FSM
847 * updates begin. We lock the FreeSpaceLock but that's purely pro
848 * forma --- if anyone else is accessing FSM yet, there's a problem.
850 * Notes: no complaint is issued if no cache file is found. If the file is
851 * found, it is deleted after reading. Thus, if we crash without a clean
852 * shutdown, the next cycle of life starts with no FSM data. To do otherwise,
853 * we'd need to do significantly more validation in this routine, because of
854 * the likelihood that what is in the dump file would be out-of-date, eg
855 * there might be entries for deleted or truncated rels.
858 LoadFreeSpaceMap(void)
861 char cachefilename[MAXPGPATH];
862 FsmCacheFileHeader header;
865 /* Try to open file */
866 snprintf(cachefilename, sizeof(cachefilename), "%s/%s",
867 DataDir, FSM_CACHE_FILENAME);
869 fp = AllocateFile(cachefilename, PG_BINARY_R);
873 elog(LOG, "could not read \"%s\": %m", cachefilename);
877 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
879 /* Read and verify file header */
880 if (fread(&header, 1, sizeof(header), fp) != sizeof(header) ||
881 strcmp(header.label, FSM_CACHE_LABEL) != 0 ||
882 header.endian != FSM_CACHE_ENDIAN ||
883 header.version != FSM_CACHE_VERSION ||
886 elog(LOG, "bogus file header in \"%s\"", cachefilename);
890 /* For each relation, in order from least to most recently used... */
891 for (relno = 0; relno < header.numRels; relno++)
893 FsmCacheRelHeader relheader;
901 /* Read and verify relation header, as best we can */
902 if (fread(&relheader, 1, sizeof(relheader), fp) != sizeof(relheader) ||
903 (relheader.isIndex != false && relheader.isIndex != true) ||
904 relheader.avgRequest >= BLCKSZ ||
905 relheader.lastPageCount < 0 ||
906 relheader.storedPages < 0)
908 elog(LOG, "bogus rel header in \"%s\"", cachefilename);
912 /* Make sure lastPageCount doesn't exceed current MaxFSMPages */
913 if (relheader.lastPageCount > MaxFSMPages)
914 relheader.lastPageCount = MaxFSMPages;
916 /* Read the per-page data */
917 nPages = relheader.storedPages;
918 if (relheader.isIndex)
919 len = nPages * sizeof(IndexFSMPageData);
921 len = nPages * sizeof(FSMPageData);
922 data = (char *) palloc(len);
923 if (fread(data, 1, len, fp) != len)
925 elog(LOG, "premature EOF in \"%s\"", cachefilename);
931 * Okay, create the FSM entry and insert data into it. Since the
932 * rels were stored in reverse usage order, at the end of the loop
933 * they will be correctly usage-ordered in memory; and if
934 * MaxFSMRelations is less than it used to be, we will correctly
935 * drop the least recently used ones.
937 fsmrel = create_fsm_rel(&relheader.key);
938 fsmrel->avgRequest = relheader.avgRequest;
940 curAlloc = realloc_fsm_rel(fsmrel, relheader.lastPageCount,
942 if (relheader.isIndex)
944 IndexFSMPageData *newLocation;
946 curAllocPages = curAlloc * INDEXCHUNKPAGES;
949 * If the data fits in our current allocation, just copy it;
950 * otherwise must compress. But compression is easy: we
951 * merely forget extra pages.
953 newLocation = (IndexFSMPageData *)
954 (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
955 if (nPages > curAllocPages)
956 nPages = curAllocPages;
957 memcpy(newLocation, data, nPages * sizeof(IndexFSMPageData));
958 fsmrel->storedPages = nPages;
962 FSMPageData *newLocation;
964 curAllocPages = curAlloc * CHUNKPAGES;
967 * If the data fits in our current allocation, just copy it;
968 * otherwise must compress.
970 newLocation = (FSMPageData *)
971 (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
972 if (nPages <= curAllocPages)
974 memcpy(newLocation, data, nPages * sizeof(FSMPageData));
975 fsmrel->storedPages = nPages;
979 pack_existing_pages(newLocation, curAllocPages,
980 (FSMPageData *) data, nPages);
981 fsmrel->storedPages = curAllocPages;
991 LWLockRelease(FreeSpaceLock);
995 /* Remove cache file before it can become stale; see notes above */
996 unlink(cachefilename);
1001 * Internal routines. These all assume the caller holds the FreeSpaceLock.
1005 * Lookup a relation in the hash table. If not present, return NULL.
1007 * The relation's position in the LRU list is not changed.
1009 static FSMRelation *
1010 lookup_fsm_rel(RelFileNode *rel)
1012 FSMRelation *fsmrel;
1014 fsmrel = (FSMRelation *) hash_search(FreeSpaceMapRelHash,
1025 * Lookup a relation in the hash table, creating an entry if not present.
1027 * On successful lookup, the relation is moved to the front of the LRU list.
1029 static FSMRelation *
1030 create_fsm_rel(RelFileNode *rel)
1032 FSMRelation *fsmrel;
1035 fsmrel = (FSMRelation *) hash_search(FreeSpaceMapRelHash,
1041 (errcode(ERRCODE_OUT_OF_MEMORY),
1042 errmsg("out of shared memory")));
1046 /* New hashtable entry, initialize it (hash_search set the key) */
1047 fsmrel->isIndex = false; /* until we learn different */
1048 fsmrel->avgRequest = INITIAL_AVERAGE;
1049 fsmrel->lastPageCount = 0;
1050 fsmrel->firstChunk = -1; /* no space allocated */
1051 fsmrel->storedPages = 0;
1052 fsmrel->nextPage = 0;
1054 /* Discard lowest-priority existing rel, if we are over limit */
1055 if (FreeSpaceMap->numRels >= MaxFSMRelations)
1056 delete_fsm_rel(FreeSpaceMap->usageListTail);
1058 /* Add new entry at front of LRU list */
1059 link_fsm_rel_usage(fsmrel);
1060 fsmrel->nextPhysical = NULL; /* not in physical-storage list */
1061 fsmrel->priorPhysical = NULL;
1062 FreeSpaceMap->numRels++;
1063 /* sumRequests is unchanged because request must be zero */
1067 /* Existing entry, move to front of LRU list */
1068 if (fsmrel->priorUsage != NULL)
1070 unlink_fsm_rel_usage(fsmrel);
1071 link_fsm_rel_usage(fsmrel);
1079 * Remove an existing FSMRelation entry.
1082 delete_fsm_rel(FSMRelation *fsmrel)
1084 FSMRelation *result;
1086 FreeSpaceMap->sumRequests -= fsm_calc_request(fsmrel);
1087 unlink_fsm_rel_usage(fsmrel);
1088 unlink_fsm_rel_storage(fsmrel);
1089 FreeSpaceMap->numRels--;
1090 result = (FSMRelation *) hash_search(FreeSpaceMapRelHash,
1091 (void *) &(fsmrel->key),
1095 elog(ERROR, "FreeSpaceMap hashtable corrupted");
1099 * Reallocate space for a FSMRelation.
1101 * This is shared code for RecordRelationFreeSpace and RecordIndexFreeSpace.
1102 * The return value is the actual new allocation, in chunks.
1105 realloc_fsm_rel(FSMRelation *fsmrel, int nPages, bool isIndex)
1112 * Delete any existing entries, and update request status.
1114 fsmrel->storedPages = 0;
1115 FreeSpaceMap->sumRequests -= fsm_calc_request(fsmrel);
1116 fsmrel->lastPageCount = nPages;
1117 fsmrel->isIndex = isIndex;
1118 myRequest = fsm_calc_request(fsmrel);
1119 FreeSpaceMap->sumRequests += myRequest;
1120 myAlloc = fsm_calc_target_allocation(myRequest);
1123 * Need to reallocate space if (a) my target allocation is more than
1124 * my current allocation, AND (b) my actual immediate need
1125 * (myRequest+1 chunks) is more than my current allocation. Otherwise
1126 * just store the new data in-place.
1128 curAlloc = fsm_current_allocation(fsmrel);
1129 if (myAlloc > curAlloc && (myRequest + 1) > curAlloc && nPages > 0)
1131 /* Remove entry from storage list, and compact */
1132 unlink_fsm_rel_storage(fsmrel);
1133 compact_fsm_storage();
1134 /* Reattach to end of storage list */
1135 link_fsm_rel_storage(fsmrel);
1136 /* And allocate storage */
1137 fsmrel->firstChunk = FreeSpaceMap->usedChunks;
1138 FreeSpaceMap->usedChunks += myAlloc;
1140 /* Watch out for roundoff error */
1141 if (FreeSpaceMap->usedChunks > FreeSpaceMap->totalChunks)
1143 FreeSpaceMap->usedChunks = FreeSpaceMap->totalChunks;
1144 curAlloc = FreeSpaceMap->totalChunks - fsmrel->firstChunk;
1151 * Link a FSMRelation into the LRU list (always at the head).
1154 link_fsm_rel_usage(FSMRelation *fsmrel)
1156 fsmrel->priorUsage = NULL;
1157 fsmrel->nextUsage = FreeSpaceMap->usageList;
1158 FreeSpaceMap->usageList = fsmrel;
1159 if (fsmrel->nextUsage != NULL)
1160 fsmrel->nextUsage->priorUsage = fsmrel;
1162 FreeSpaceMap->usageListTail = fsmrel;
1166 * Delink a FSMRelation from the LRU list.
1169 unlink_fsm_rel_usage(FSMRelation *fsmrel)
1171 if (fsmrel->priorUsage != NULL)
1172 fsmrel->priorUsage->nextUsage = fsmrel->nextUsage;
1174 FreeSpaceMap->usageList = fsmrel->nextUsage;
1175 if (fsmrel->nextUsage != NULL)
1176 fsmrel->nextUsage->priorUsage = fsmrel->priorUsage;
1178 FreeSpaceMap->usageListTail = fsmrel->priorUsage;
1181 * We don't bother resetting fsmrel's links, since it's about to be
1182 * deleted or relinked at the head.
1187 * Link a FSMRelation into the storage-order list (always at the tail).
1190 link_fsm_rel_storage(FSMRelation *fsmrel)
1192 fsmrel->nextPhysical = NULL;
1193 fsmrel->priorPhysical = FreeSpaceMap->lastRel;
1194 if (FreeSpaceMap->lastRel != NULL)
1195 FreeSpaceMap->lastRel->nextPhysical = fsmrel;
1197 FreeSpaceMap->firstRel = fsmrel;
1198 FreeSpaceMap->lastRel = fsmrel;
1202 * Delink a FSMRelation from the storage-order list, if it's in it.
1205 unlink_fsm_rel_storage(FSMRelation *fsmrel)
1207 if (fsmrel->priorPhysical != NULL || FreeSpaceMap->firstRel == fsmrel)
1209 if (fsmrel->priorPhysical != NULL)
1210 fsmrel->priorPhysical->nextPhysical = fsmrel->nextPhysical;
1212 FreeSpaceMap->firstRel = fsmrel->nextPhysical;
1213 if (fsmrel->nextPhysical != NULL)
1214 fsmrel->nextPhysical->priorPhysical = fsmrel->priorPhysical;
1216 FreeSpaceMap->lastRel = fsmrel->priorPhysical;
1218 /* mark as not in list, since we may not put it back immediately */
1219 fsmrel->nextPhysical = NULL;
1220 fsmrel->priorPhysical = NULL;
1221 /* Also mark it as having no storage */
1222 fsmrel->firstChunk = -1;
1223 fsmrel->storedPages = 0;
1227 * Look to see if a page with at least the specified amount of space is
1228 * available in the given FSMRelation. If so, return its page number,
1229 * and advance the nextPage counter so that the next inquiry will return
1230 * a different page if possible; also update the entry to show that the
1231 * requested space is not available anymore. Return InvalidBlockNumber
1235 find_free_space(FSMRelation *fsmrel, Size spaceNeeded)
1238 int pagesToCheck, /* outer loop counter */
1239 pageIndex; /* current page index */
1241 if (fsmrel->isIndex)
1242 elog(ERROR, "find_free_space called for an index relation");
1243 info = (FSMPageData *)
1244 (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
1245 pageIndex = fsmrel->nextPage;
1246 /* Last operation may have left nextPage pointing past end */
1247 if (pageIndex >= fsmrel->storedPages)
1250 for (pagesToCheck = fsmrel->storedPages; pagesToCheck > 0; pagesToCheck--)
1252 FSMPageData *page = info + pageIndex;
1253 Size spaceAvail = FSMPageGetSpace(page);
1255 /* Check this page */
1256 if (spaceAvail >= spaceNeeded)
1259 * Found what we want --- adjust the entry, and update
1262 FSMPageSetSpace(page, spaceAvail - spaceNeeded);
1263 fsmrel->nextPage = pageIndex + 1;
1264 return FSMPageGetPageNum(page);
1266 /* Advance pageIndex, wrapping around if needed */
1267 if (++pageIndex >= fsmrel->storedPages)
1271 return InvalidBlockNumber; /* nothing found */
1275 * As above, but for index case --- we only deal in whole pages.
1278 find_index_free_space(FSMRelation *fsmrel)
1280 IndexFSMPageData *info;
1284 * If isIndex isn't set, it could be that RecordIndexFreeSpace() has
1285 * never yet been called on this relation, and we're still looking at
1286 * the default setting from create_fsm_rel(). If so, just act as
1287 * though there's no space.
1289 if (!fsmrel->isIndex)
1291 if (fsmrel->storedPages == 0)
1292 return InvalidBlockNumber;
1293 elog(ERROR, "find_index_free_space called for a non-index relation");
1297 * For indexes, there's no need for the nextPage state variable; we
1298 * just remove and return the first available page. (We could save
1299 * cycles here by returning the last page, but it seems better to
1300 * encourage re-use of lower-numbered pages.)
1302 if (fsmrel->storedPages <= 0)
1303 return InvalidBlockNumber; /* no pages available */
1304 info = (IndexFSMPageData *)
1305 (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
1306 result = IndexFSMPageGetPageNum(info);
1307 fsmrel->storedPages--;
1308 memmove(info, info + 1, fsmrel->storedPages * sizeof(IndexFSMPageData));
1313 * fsm_record_free_space - guts of RecordFreeSpace operation (now only
1314 * provided as part of RecordAndGetPageWithFreeSpace).
1317 fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page, Size spaceAvail)
1321 if (fsmrel->isIndex)
1322 elog(ERROR, "fsm_record_free_space called for an index relation");
1323 if (lookup_fsm_page_entry(fsmrel, page, &pageIndex))
1325 /* Found an existing entry for page; update it */
1328 info = (FSMPageData *)
1329 (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
1331 FSMPageSetSpace(info, spaceAvail);
1336 * No existing entry; ignore the call. We used to add the page to
1337 * the FSM --- but in practice, if the page hasn't got enough
1338 * space to satisfy the caller who's kicking it back to us, then
1339 * it's probably uninteresting to everyone else as well.
1345 * Look for an entry for a specific page (block number) in a FSMRelation.
1346 * Returns TRUE if a matching entry exists, else FALSE.
1348 * The output argument *outPageIndex is set to indicate where the entry exists
1349 * (if TRUE result) or could be inserted (if FALSE result).
1352 lookup_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page,
1355 /* Check for empty relation */
1356 if (fsmrel->storedPages <= 0)
1362 /* Do binary search */
1363 if (fsmrel->isIndex)
1365 IndexFSMPageData *info;
1369 info = (IndexFSMPageData *)
1370 (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
1372 high = fsmrel->storedPages - 1;
1378 middle = low + (high - low) / 2;
1379 probe = IndexFSMPageGetPageNum(info + middle);
1382 *outPageIndex = middle;
1385 else if (probe < page)
1390 *outPageIndex = low;
1399 info = (FSMPageData *)
1400 (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
1402 high = fsmrel->storedPages - 1;
1408 middle = low + (high - low) / 2;
1409 probe = FSMPageGetPageNum(info + middle);
1412 *outPageIndex = middle;
1415 else if (probe < page)
1420 *outPageIndex = low;
1426 * Re-pack the FSM storage arena, dropping data if necessary to meet the
1427 * current allocation target for each relation. At conclusion, all available
1428 * space in the arena will be coalesced at the end.
1431 compact_fsm_storage(void)
1433 int nextChunkIndex = 0;
1434 bool did_push = false;
1435 FSMRelation *fsmrel;
1437 for (fsmrel = FreeSpaceMap->firstRel;
1439 fsmrel = fsmrel->nextPhysical)
1450 * Calculate target allocation, make sure we don't overrun due to
1453 newAlloc = fsm_calc_target_allocation(fsm_calc_request(fsmrel));
1454 if (newAlloc > FreeSpaceMap->totalChunks - nextChunkIndex)
1455 newAlloc = FreeSpaceMap->totalChunks - nextChunkIndex;
1456 if (fsmrel->isIndex)
1457 newAllocPages = newAlloc * INDEXCHUNKPAGES;
1459 newAllocPages = newAlloc * CHUNKPAGES;
1462 * Determine current size, current and new locations
1464 curChunks = fsm_current_chunks(fsmrel);
1465 oldChunkIndex = fsmrel->firstChunk;
1466 oldLocation = FreeSpaceMap->arena + oldChunkIndex * CHUNKBYTES;
1467 newChunkIndex = nextChunkIndex;
1468 newLocation = FreeSpaceMap->arena + newChunkIndex * CHUNKBYTES;
1471 * It's possible that we have to move data down, not up, if the
1472 * allocations of previous rels expanded. This normally means
1473 * that our allocation expanded too (or at least got no worse),
1474 * and ditto for later rels. So there should be room to move all
1475 * our data down without dropping any --- but we might have to
1476 * push down following rels to acquire the room. We don't want to
1477 * do the push more than once, so pack everything against the end
1478 * of the arena if so.
1480 * In corner cases where we are on the short end of a roundoff choice
1481 * that we were formerly on the long end of, it's possible that we
1482 * have to move down and compress our data too. In fact, even
1483 * after pushing down the following rels, there might not be as
1484 * much space as we computed for this rel above --- that would
1485 * imply that some following rel(s) are also on the losing end of
1486 * roundoff choices. We could handle this fairly by doing the
1487 * per-rel compactions out-of-order, but that seems like way too
1488 * much complexity to deal with a very infrequent corner case.
1489 * Instead, we simply drop pages from the end of the current rel's
1490 * data until it fits.
1492 if (newChunkIndex > oldChunkIndex)
1494 int limitChunkIndex;
1496 if (newAllocPages < fsmrel->storedPages)
1498 /* move and compress --- just drop excess pages */
1499 fsmrel->storedPages = newAllocPages;
1500 curChunks = fsm_current_chunks(fsmrel);
1502 /* is there enough space? */
1503 if (fsmrel->nextPhysical != NULL)
1504 limitChunkIndex = fsmrel->nextPhysical->firstChunk;
1506 limitChunkIndex = FreeSpaceMap->totalChunks;
1507 if (newChunkIndex + curChunks > limitChunkIndex)
1509 /* not enough space, push down following rels */
1512 push_fsm_rels_after(fsmrel);
1515 /* now is there enough space? */
1516 if (fsmrel->nextPhysical != NULL)
1517 limitChunkIndex = fsmrel->nextPhysical->firstChunk;
1519 limitChunkIndex = FreeSpaceMap->totalChunks;
1520 if (newChunkIndex + curChunks > limitChunkIndex)
1522 /* uh-oh, forcibly cut the allocation to fit */
1523 newAlloc = limitChunkIndex - newChunkIndex;
1526 * If newAlloc < 0 at this point, we are moving the
1527 * rel's firstChunk into territory currently assigned
1528 * to a later rel. This is okay so long as we do not
1529 * copy any data. The rels will be back in
1530 * nondecreasing firstChunk order at completion of the
1535 if (fsmrel->isIndex)
1536 newAllocPages = newAlloc * INDEXCHUNKPAGES;
1538 newAllocPages = newAlloc * CHUNKPAGES;
1539 fsmrel->storedPages = newAllocPages;
1540 curChunks = fsm_current_chunks(fsmrel);
1543 memmove(newLocation, oldLocation, curChunks * CHUNKBYTES);
1545 else if (newAllocPages < fsmrel->storedPages)
1548 * Need to compress the page data. For an index,
1549 * "compression" just means dropping excess pages; otherwise
1550 * we try to keep the ones with the most space.
1552 if (fsmrel->isIndex)
1554 fsmrel->storedPages = newAllocPages;
1555 /* may need to move data */
1556 if (newChunkIndex != oldChunkIndex)
1557 memmove(newLocation, oldLocation, newAlloc * CHUNKBYTES);
1561 pack_existing_pages((FSMPageData *) newLocation,
1563 (FSMPageData *) oldLocation,
1564 fsmrel->storedPages);
1565 fsmrel->storedPages = newAllocPages;
1568 else if (newChunkIndex != oldChunkIndex)
1571 * No compression needed, but must copy the data up
1573 memmove(newLocation, oldLocation, curChunks * CHUNKBYTES);
1575 fsmrel->firstChunk = newChunkIndex;
1576 nextChunkIndex += newAlloc;
1578 Assert(nextChunkIndex <= FreeSpaceMap->totalChunks);
1579 FreeSpaceMap->usedChunks = nextChunkIndex;
1583 * Push all FSMRels physically after afterRel to the end of the storage arena.
1585 * We sometimes have to do this when deletion or truncation of a relation
1586 * causes the allocations of remaining rels to expand markedly. We must
1587 * temporarily push existing data down to the end so that we can move it
1588 * back up in an orderly fashion.
1591 push_fsm_rels_after(FSMRelation *afterRel)
1593 int nextChunkIndex = FreeSpaceMap->totalChunks;
1594 FSMRelation *fsmrel;
1596 FreeSpaceMap->usedChunks = FreeSpaceMap->totalChunks;
1598 for (fsmrel = FreeSpaceMap->lastRel;
1600 fsmrel = fsmrel->priorPhysical)
1608 if (fsmrel == afterRel)
1611 chunkCount = fsm_current_chunks(fsmrel);
1612 nextChunkIndex -= chunkCount;
1613 newChunkIndex = nextChunkIndex;
1614 oldChunkIndex = fsmrel->firstChunk;
1615 if (newChunkIndex < oldChunkIndex)
1617 /* we're pushing down, how can it move up? */
1618 elog(PANIC, "inconsistent entry sizes in FSM");
1620 else if (newChunkIndex > oldChunkIndex)
1622 /* need to move it */
1623 newLocation = FreeSpaceMap->arena + newChunkIndex * CHUNKBYTES;
1624 oldLocation = FreeSpaceMap->arena + oldChunkIndex * CHUNKBYTES;
1625 memmove(newLocation, oldLocation, chunkCount * CHUNKBYTES);
1626 fsmrel->firstChunk = newChunkIndex;
1629 Assert(nextChunkIndex >= 0);
1633 * Pack a set of per-page freespace data into a smaller amount of space.
1635 * The method is to compute a low-resolution histogram of the free space
1636 * amounts, then determine which histogram bin contains the break point.
1637 * We then keep all pages above that bin, none below it, and just enough
1638 * of the pages in that bin to fill the output area exactly.
1640 #define HISTOGRAM_BINS 64
1643 pack_incoming_pages(FSMPageData *newLocation, int newPages,
1644 PageFreeSpaceInfo *pageSpaces, int nPages)
1646 int histogram[HISTOGRAM_BINS];
1653 Assert(newPages < nPages); /* else I shouldn't have been called */
1654 /* Build histogram */
1655 MemSet(histogram, 0, sizeof(histogram));
1656 for (i = 0; i < nPages; i++)
1658 Size avail = pageSpaces[i].avail;
1660 if (avail >= BLCKSZ)
1661 elog(ERROR, "bogus freespace amount");
1662 avail /= (BLCKSZ / HISTOGRAM_BINS);
1665 /* Find the breakpoint bin */
1667 for (i = HISTOGRAM_BINS - 1; i >= 0; i--)
1669 int sum = above + histogram[i];
1676 thresholdL = i * BLCKSZ / HISTOGRAM_BINS; /* low bound of bp bin */
1677 thresholdU = (i + 1) * BLCKSZ / HISTOGRAM_BINS; /* hi bound */
1678 binct = newPages - above; /* number to take from bp bin */
1679 /* And copy the appropriate data */
1680 for (i = 0; i < nPages; i++)
1682 BlockNumber page = pageSpaces[i].blkno;
1683 Size avail = pageSpaces[i].avail;
1685 /* Check caller provides sorted data */
1686 if (i > 0 && page <= pageSpaces[i - 1].blkno)
1687 elog(ERROR, "free-space data is not in page order");
1688 /* Save this page? */
1689 if (avail >= thresholdU ||
1690 (avail >= thresholdL && (--binct >= 0)))
1692 FSMPageSetPageNum(newLocation, page);
1693 FSMPageSetSpace(newLocation, avail);
1698 Assert(newPages == 0);
1702 * Pack a set of per-page freespace data into a smaller amount of space.
1704 * This is algorithmically identical to pack_incoming_pages(), but accepts
1705 * a different input representation. Also, we assume the input data has
1706 * previously been checked for validity (size in bounds, pages in order).
1708 * Note: it is possible for the source and destination arrays to overlap.
1709 * The caller is responsible for making sure newLocation is at lower addresses
1710 * so that we can copy data moving forward in the arrays without problem.
1713 pack_existing_pages(FSMPageData *newLocation, int newPages,
1714 FSMPageData *oldLocation, int oldPages)
1716 int histogram[HISTOGRAM_BINS];
1723 Assert(newPages < oldPages); /* else I shouldn't have been called */
1724 /* Build histogram */
1725 MemSet(histogram, 0, sizeof(histogram));
1726 for (i = 0; i < oldPages; i++)
1728 Size avail = FSMPageGetSpace(oldLocation + i);
1730 /* Shouldn't happen, but test to protect against stack clobber */
1731 if (avail >= BLCKSZ)
1732 elog(ERROR, "bogus freespace amount");
1733 avail /= (BLCKSZ / HISTOGRAM_BINS);
1736 /* Find the breakpoint bin */
1738 for (i = HISTOGRAM_BINS - 1; i >= 0; i--)
1740 int sum = above + histogram[i];
1747 thresholdL = i * BLCKSZ / HISTOGRAM_BINS; /* low bound of bp bin */
1748 thresholdU = (i + 1) * BLCKSZ / HISTOGRAM_BINS; /* hi bound */
1749 binct = newPages - above; /* number to take from bp bin */
1750 /* And copy the appropriate data */
1751 for (i = 0; i < oldPages; i++)
1753 BlockNumber page = FSMPageGetPageNum(oldLocation + i);
1754 Size avail = FSMPageGetSpace(oldLocation + i);
1756 /* Save this page? */
1757 if (avail >= thresholdU ||
1758 (avail >= thresholdL && (--binct >= 0)))
1760 FSMPageSetPageNum(newLocation, page);
1761 FSMPageSetSpace(newLocation, avail);
1766 Assert(newPages == 0);
1770 * Calculate number of chunks "requested" by a rel.
1772 * Rel's lastPageCount and isIndex settings must be up-to-date when called.
1774 * See notes at top of file for details.
1777 fsm_calc_request(FSMRelation *fsmrel)
1781 /* Convert page count to chunk count */
1782 if (fsmrel->isIndex)
1783 chunkCount = (fsmrel->lastPageCount - 1) / INDEXCHUNKPAGES + 1;
1785 chunkCount = (fsmrel->lastPageCount - 1) / CHUNKPAGES + 1;
1786 /* "Request" is anything beyond our one guaranteed chunk */
1787 if (chunkCount <= 0)
1790 return chunkCount - 1;
1794 * Calculate target allocation (number of chunks) for a rel
1796 * Parameter is the result from fsm_calc_request(). The global sumRequests
1797 * and numRels totals must be up-to-date already.
1799 * See notes at top of file for details.
1802 fsm_calc_target_allocation(int myRequest)
1807 spareChunks = FreeSpaceMap->totalChunks - FreeSpaceMap->numRels;
1808 Assert(spareChunks > 0);
1809 if (spareChunks >= FreeSpaceMap->sumRequests)
1811 /* We aren't oversubscribed, so allocate exactly the request */
1816 extra = (int) rint(spareChunks * myRequest / FreeSpaceMap->sumRequests);
1817 if (extra < 0) /* shouldn't happen, but make sure */
1824 * Calculate number of chunks actually used to store current data
1827 fsm_current_chunks(FSMRelation *fsmrel)
1831 /* Make sure storedPages==0 produces right answer */
1832 if (fsmrel->storedPages <= 0)
1834 /* Convert page count to chunk count */
1835 if (fsmrel->isIndex)
1836 chunkCount = (fsmrel->storedPages - 1) / INDEXCHUNKPAGES + 1;
1838 chunkCount = (fsmrel->storedPages - 1) / CHUNKPAGES + 1;
1843 * Calculate current actual allocation (number of chunks) for a rel
1846 fsm_current_allocation(FSMRelation *fsmrel)
1848 if (fsmrel->nextPhysical != NULL)
1849 return fsmrel->nextPhysical->firstChunk - fsmrel->firstChunk;
1850 else if (fsmrel == FreeSpaceMap->lastRel)
1851 return FreeSpaceMap->usedChunks - fsmrel->firstChunk;
1854 /* it's not in the storage-order list */
1855 Assert(fsmrel->firstChunk < 0 && fsmrel->storedPages == 0);
1861 #ifdef FREESPACE_DEBUG
1863 * Dump contents of freespace map for debugging.
1865 * We assume caller holds the FreeSpaceLock, or is otherwise unconcerned
1866 * about other processes.
1871 FSMRelation *fsmrel;
1872 FSMRelation *prevrel = NULL;
1876 for (fsmrel = FreeSpaceMap->usageList; fsmrel; fsmrel = fsmrel->nextUsage)
1879 fprintf(stderr, "Map %d: rel %u/%u/%u isIndex %d avgRequest %u lastPageCount %d nextPage %d\nMap= ",
1881 fsmrel->key.spcNode, fsmrel->key.dbNode, fsmrel->key.relNode,
1882 (int) fsmrel->isIndex, fsmrel->avgRequest,
1883 fsmrel->lastPageCount, fsmrel->nextPage);
1884 if (fsmrel->isIndex)
1886 IndexFSMPageData *page;
1888 page = (IndexFSMPageData *)
1889 (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
1890 for (nPages = 0; nPages < fsmrel->storedPages; nPages++)
1892 fprintf(stderr, " %u",
1893 IndexFSMPageGetPageNum(page));
1901 page = (FSMPageData *)
1902 (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES);
1903 for (nPages = 0; nPages < fsmrel->storedPages; nPages++)
1905 fprintf(stderr, " %u:%u",
1906 FSMPageGetPageNum(page),
1907 FSMPageGetSpace(page));
1911 fprintf(stderr, "\n");
1912 /* Cross-check list links */
1913 if (prevrel != fsmrel->priorUsage)
1914 fprintf(stderr, "DumpFreeSpace: broken list links\n");
1917 if (prevrel != FreeSpaceMap->usageListTail)
1918 fprintf(stderr, "DumpFreeSpace: broken list links\n");
1919 /* Cross-check global counters */
1920 if (relNum != FreeSpaceMap->numRels)
1921 fprintf(stderr, "DumpFreeSpace: %d rels in list, but numRels = %d\n",
1922 relNum, FreeSpaceMap->numRels);
1925 #endif /* FREESPACE_DEBUG */