1 /*-------------------------------------------------------------------------
4 * POSTGRES free space map for quickly finding free space in relations
7 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * $Header: /cvsroot/pgsql/src/backend/storage/freespace/freespace.c,v 1.14 2002/09/20 19:56:01 tgl Exp $
16 * The only really interesting aspect of this code is the heuristics for
17 * deciding how much information we can afford to keep about each relation,
18 * given that we have a limited amount of workspace in shared memory.
19 * These currently work as follows:
21 * The number of distinct relations tracked is limited by a configuration
22 * variable (MaxFSMRelations). When this would be exceeded, we discard the
23 * least frequently used relation. A previously-unknown relation is always
24 * entered into the map with useCount 1 on first reference, even if this
25 * causes an existing entry with higher useCount to be discarded. This may
26 * cause a little bit of thrashing among the bottom entries in the list,
27 * but if we didn't do it then there'd be no way for a relation not in the
28 * map to get in once the map is full. Note we allow a relation to be in the
29 * map even if no pages are currently stored for it: this allows us to track
30 * its useCount & threshold, which may eventually go high enough to give it
31 * priority for page storage.
33 * The total number of pages tracked is also set by a configuration variable
34 * (MaxFSMPages). We allocate these dynamically among the known relations,
35 * giving preference to the more-frequently-referenced relations and those
36 * with smaller tuples. This is done by a thresholding mechanism: for each
37 * relation we keep track of a current target threshold, and allow only pages
38 * with free space >= threshold to be stored in the map. The threshold starts
39 * at BLCKSZ/2 (a somewhat arbitrary choice) for a newly entered relation.
40 * On each GetFreeSpace reference, the threshold is moved towards the
41 * request size using a slow exponential moving average; this means that
42 * for heavily used relations, the threshold will approach the average
43 * freespace request size (average tuple size). Whenever we run out of
44 * storage space in the map, we double the threshold of all existing relations
45 * (but not to more than BLCKSZ, so as to prevent runaway thresholds).
46 * Infrequently used relations will thus tend to have large thresholds.
48 * XXX this thresholding mechanism is experimental; need to see how well
49 * it works in practice.
51 *-------------------------------------------------------------------------
57 #include "storage/freespace.h"
58 #include "storage/itemid.h"
59 #include "storage/lwlock.h"
60 #include "storage/shmem.h"
64 * Shared free-space-map objects
66 * Note: we handle pointers to these items as pointers, not as SHMEM_OFFSETs.
67 * This assumes that all processes accessing the map will have the shared
68 * memory segment mapped at the same place in their address space.
70 typedef struct FSMHeader FSMHeader;
71 typedef struct FSMRelation FSMRelation;
72 typedef struct FSMChunk FSMChunk;
74 /* Header for whole map */
77 HTAB *relHash; /* hashtable of FSMRelation entries */
78 FSMRelation *relList; /* FSMRelations in useCount order */
79 FSMRelation *relListTail; /* tail of FSMRelation list */
80 int numRels; /* number of FSMRelations now in use */
81 FSMChunk *freeChunks; /* linked list of currently-free chunks */
82 int numFreeChunks; /* number of free chunks */
86 * Per-relation struct --- this is an entry in the shared hash table.
87 * The hash key is the RelFileNode value (hence, we look at the physical
88 * relation ID, not the logical ID, which is appropriate).
92 RelFileNode key; /* hash key (must be first) */
93 FSMRelation *nextRel; /* next rel in useCount order */
94 FSMRelation *priorRel; /* prior rel in useCount order */
95 int useCount; /* use count for prioritizing rels */
96 Size threshold; /* minimum amount of free space to keep */
97 int nextPage; /* index (from 0) to start next search at */
98 int numPages; /* total number of pages we have info
100 int numChunks; /* number of FSMChunks allocated to rel */
101 FSMChunk *relChunks; /* linked list of page info chunks */
102 FSMChunk *lastChunk; /* last chunk in linked list */
106 * Info about individual pages in a relation is stored in chunks to reduce
107 * allocation overhead. Note that we allow any chunk of a relation's list
108 * to be partly full, not only the last chunk; this speeds deletion of
109 * individual page entries. When the total number of unused slots reaches
110 * CHUNKPAGES, we compact out the unused slots so that we can return a chunk
111 * to the freelist; but there's no point in doing the compaction before that.
114 #define CHUNKPAGES 32 /* each chunk can store this many pages */
118 FSMChunk *next; /* linked-list link */
119 int numPages; /* number of pages described here */
120 BlockNumber pages[CHUNKPAGES]; /* page numbers within relation */
121 ItemLength bytes[CHUNKPAGES]; /* free space available on each
126 int MaxFSMRelations; /* these are set by guc.c */
129 static FSMHeader *FreeSpaceMap; /* points to FSMHeader in shared memory */
132 static FSMRelation *lookup_fsm_rel(RelFileNode *rel);
133 static FSMRelation *create_fsm_rel(RelFileNode *rel);
134 static void delete_fsm_rel(FSMRelation *fsmrel);
135 static void link_fsm_rel_after(FSMRelation *fsmrel, FSMRelation *oldrel);
136 static void unlink_fsm_rel(FSMRelation *fsmrel);
137 static void free_chunk_chain(FSMChunk *fchunk);
138 static BlockNumber find_free_space(FSMRelation *fsmrel, Size spaceNeeded);
139 static void fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page,
141 static bool lookup_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page,
142 FSMChunk **outChunk, int *outChunkRelIndex);
143 static bool insert_fsm_page_entry(FSMRelation *fsmrel,
144 BlockNumber page, Size spaceAvail,
145 FSMChunk *chunk, int chunkRelIndex);
146 static bool append_fsm_chunk(FSMRelation *fsmrel);
147 static bool push_fsm_page_entry(BlockNumber page, Size spaceAvail,
148 FSMChunk *chunk, int chunkRelIndex);
149 static void delete_fsm_page_entry(FSMRelation *fsmrel, FSMChunk *chunk,
151 static void compact_fsm_page_list(FSMRelation *fsmrel);
152 static void acquire_fsm_free_space(void);
161 * InitFreeSpaceMap -- Initialize the freespace module.
163 * This must be called once during shared memory initialization.
164 * It builds the empty free space map table. FreeSpaceLock must also be
165 * initialized at some point, but is not touched here --- we assume there is
166 * no need for locking, since only the calling process can be accessing shared
170 InitFreeSpaceMap(void)
177 /* Create table header */
178 FreeSpaceMap = (FSMHeader *) ShmemAlloc(sizeof(FSMHeader));
179 if (FreeSpaceMap == NULL)
180 elog(FATAL, "Insufficient shared memory for free space map");
181 MemSet(FreeSpaceMap, 0, sizeof(FSMHeader));
183 /* Create hashtable for FSMRelations */
184 info.keysize = sizeof(RelFileNode);
185 info.entrysize = sizeof(FSMRelation);
186 info.hash = tag_hash;
188 FreeSpaceMap->relHash = ShmemInitHash("Free Space Map Hash",
189 MaxFSMRelations / 10,
192 (HASH_ELEM | HASH_FUNCTION));
194 if (!FreeSpaceMap->relHash)
195 elog(FATAL, "Insufficient shared memory for free space map");
197 /* Allocate FSMChunks and fill up the free-chunks list */
198 nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1;
199 FreeSpaceMap->numFreeChunks = nchunks;
201 chunks = (FSMChunk *) ShmemAlloc(nchunks * sizeof(FSMChunk));
203 elog(FATAL, "Insufficient shared memory for free space map");
206 while (nchunks-- > 0)
208 chunks->next = prevchunk;
212 FreeSpaceMap->freeChunks = prevchunk;
216 * Estimate amount of shmem space needed for FSM.
219 FreeSpaceShmemSize(void)
225 size = MAXALIGN(sizeof(FSMHeader));
227 /* hash table, including the FSMRelation objects */
228 size += hash_estimate_size(MaxFSMRelations, sizeof(FSMRelation));
230 /* FSMChunk objects */
231 nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1;
233 size += MAXALIGN(nchunks * sizeof(FSMChunk));
239 * GetPageWithFreeSpace - try to find a page in the given relation with
240 * at least the specified amount of free space.
242 * If successful, return the block number; if not, return InvalidBlockNumber.
244 * The caller must be prepared for the possibility that the returned page
245 * will turn out to have too little space available by the time the caller
246 * gets a lock on it. In that case, the caller should report the actual
247 * amount of free space available on that page (via RecordFreeSpace) and
248 * then try again. If InvalidBlockNumber is returned, extend the relation.
251 GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded)
254 BlockNumber freepage;
256 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
259 * We always add a rel to the hashtable when it is inquired about.
261 fsmrel = create_fsm_rel(rel);
264 * Adjust the threshold towards the space request. This essentially
265 * implements an exponential moving average with an equivalent period
266 * of about 63 requests. Ignore silly requests, however, to ensure
267 * that the average stays in bounds.
269 * In theory, if the threshold increases here we should immediately
270 * delete any pages that fall below the new threshold. In practice it
271 * seems OK to wait until we have a need to compact space.
273 if (spaceNeeded > 0 && spaceNeeded < BLCKSZ)
275 int cur_avg = (int) fsmrel->threshold;
277 cur_avg += ((int) spaceNeeded - cur_avg) / 32;
278 fsmrel->threshold = (Size) cur_avg;
280 freepage = find_free_space(fsmrel, spaceNeeded);
281 LWLockRelease(FreeSpaceLock);
286 * RecordFreeSpace - record the amount of free space available on a page.
288 * The FSM is at liberty to forget about the page instead, if the amount of
289 * free space is too small to be interesting. The only guarantee is that
290 * a subsequent request to get a page with more than spaceAvail space free
291 * will not return this page.
294 RecordFreeSpace(RelFileNode *rel, BlockNumber page, Size spaceAvail)
298 /* Sanity check: ensure spaceAvail will fit into ItemLength */
299 AssertArg(spaceAvail < BLCKSZ);
301 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
304 * We choose not to add rels to the hashtable unless they've been
305 * inquired about with GetPageWithFreeSpace. Also, a Record operation
306 * does not increase the useCount or change threshold, only Get does.
308 fsmrel = lookup_fsm_rel(rel);
310 fsm_record_free_space(fsmrel, page, spaceAvail);
311 LWLockRelease(FreeSpaceLock);
315 * RecordAndGetPageWithFreeSpace - combo form to save one lock and
316 * hash table lookup cycle.
319 RecordAndGetPageWithFreeSpace(RelFileNode *rel,
325 BlockNumber freepage;
327 /* Sanity check: ensure spaceAvail will fit into ItemLength */
328 AssertArg(oldSpaceAvail < BLCKSZ);
330 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
333 * We always add a rel to the hashtable when it is inquired about.
335 fsmrel = create_fsm_rel(rel);
338 * Adjust the threshold towards the space request, same as in
339 * GetPageWithFreeSpace.
341 * Note that we do this before storing data for oldPage, which means this
342 * isn't exactly equivalent to Record followed by Get; but it seems
343 * appropriate to adjust the threshold first.
345 if (spaceNeeded > 0 && spaceNeeded < BLCKSZ)
347 int cur_avg = (int) fsmrel->threshold;
349 cur_avg += ((int) spaceNeeded - cur_avg) / 32;
350 fsmrel->threshold = (Size) cur_avg;
353 fsm_record_free_space(fsmrel, oldPage, oldSpaceAvail);
355 freepage = find_free_space(fsmrel, spaceNeeded);
356 LWLockRelease(FreeSpaceLock);
361 * MultiRecordFreeSpace - record available-space info about multiple pages
362 * of a relation in one call.
364 * First, the FSM must discard any entries it has for pages >= minPage.
365 * This allows obsolete info to be discarded (for example, it is used when
366 * truncating a relation). Any entries before minPage should be kept.
368 * Second, if nPages > 0, record the page numbers and free space amounts in
369 * the given pageSpaces[] array. As with RecordFreeSpace, the FSM is at
370 * liberty to discard some of this information. The pageSpaces[] array must
371 * be sorted in order by blkno, and may not contain entries before minPage.
374 MultiRecordFreeSpace(RelFileNode *rel,
377 PageFreeSpaceInfo *pageSpaces)
382 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
383 fsmrel = lookup_fsm_rel(rel);
387 * Remove entries >= minPage
392 /* Use lookup to locate first entry >= minPage */
393 lookup_fsm_page_entry(fsmrel, minPage, &chunk, &chunkRelIndex);
394 /* Set free space to 0 for each page >= minPage */
397 int numPages = chunk->numPages;
399 for (i = chunkRelIndex; i < numPages; i++)
404 /* Now compact out the zeroed entries, along with any other junk */
405 compact_fsm_page_list(fsmrel);
408 * Add new entries, if appropriate.
410 * This can be much cheaper than a full fsm_record_free_space()
411 * call because we know we are appending to the end of the relation.
413 for (i = 0; i < nPages; i++)
415 BlockNumber page = pageSpaces[i].blkno;
416 Size avail = pageSpaces[i].avail;
419 /* Check caller provides sorted data */
420 if (i > 0 ? (page <= pageSpaces[i-1].blkno) : (page < minPage))
421 elog(ERROR, "MultiRecordFreeSpace: data not in page order");
423 /* Ignore pages too small to fit */
424 if (avail < fsmrel->threshold)
427 /* Get another chunk if needed */
428 /* We may need to loop if acquire_fsm_free_space() fails */
429 while ((chunk = fsmrel->lastChunk) == NULL ||
430 chunk->numPages >= CHUNKPAGES)
432 if (!append_fsm_chunk(fsmrel))
433 acquire_fsm_free_space();
436 /* Recheck in case threshold was raised by acquire */
437 if (avail < fsmrel->threshold)
441 chunk->pages[chunk->numPages] = page;
442 chunk->bytes[chunk->numPages] = (ItemLength) avail;
447 LWLockRelease(FreeSpaceLock);
451 * FreeSpaceMapForgetRel - forget all about a relation.
453 * This is called when a relation is deleted. Although we could just let
454 * the rel age out of the map, it's better to reclaim and reuse the space
458 FreeSpaceMapForgetRel(RelFileNode *rel)
462 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
463 fsmrel = lookup_fsm_rel(rel);
465 delete_fsm_rel(fsmrel);
466 LWLockRelease(FreeSpaceLock);
470 * FreeSpaceMapForgetDatabase - forget all relations of a database.
472 * This is called during DROP DATABASE. As above, might as well reclaim
473 * map space sooner instead of later.
475 * XXX when we implement tablespaces, target Oid will need to be tablespace
476 * ID not database ID.
479 FreeSpaceMapForgetDatabase(Oid dbid)
484 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
485 for (fsmrel = FreeSpaceMap->relList; fsmrel; fsmrel = nextrel)
487 nextrel = fsmrel->nextRel; /* in case we delete it */
488 if (fsmrel->key.tblNode == dbid)
489 delete_fsm_rel(fsmrel);
491 LWLockRelease(FreeSpaceLock);
496 * Internal routines. These all assume the caller holds the FreeSpaceLock.
500 * Lookup a relation in the hash table. If not present, return NULL.
501 * The relation's useCount is not changed.
504 lookup_fsm_rel(RelFileNode *rel)
508 fsmrel = (FSMRelation *) hash_search(FreeSpaceMap->relHash,
519 * Lookup a relation in the hash table, creating an entry if not present.
521 * On successful lookup, the relation's useCount is incremented, possibly
522 * causing it to move up in the priority list.
525 create_fsm_rel(RelFileNode *rel)
531 fsmrel = (FSMRelation *) hash_search(FreeSpaceMap->relHash,
536 elog(ERROR, "FreeSpaceMap hashtable out of memory");
540 /* New hashtable entry, initialize it (hash_search set the key) */
541 fsmrel->useCount = 1;
542 fsmrel->threshold = BLCKSZ / 2; /* starting point for new entry */
543 fsmrel->nextPage = 0;
544 fsmrel->numPages = 0;
545 fsmrel->numChunks = 0;
546 fsmrel->relChunks = NULL;
547 fsmrel->lastChunk = NULL;
548 /* Discard lowest-priority existing rel, if we are over limit */
549 if (FreeSpaceMap->numRels >= MaxFSMRelations)
550 delete_fsm_rel(FreeSpaceMap->relListTail);
553 * Add new entry in front of any others with useCount 1 (since it
554 * is more recently used than them).
556 oldrel = FreeSpaceMap->relListTail;
557 while (oldrel && oldrel->useCount <= 1)
558 oldrel = oldrel->priorRel;
559 link_fsm_rel_after(fsmrel, oldrel);
560 FreeSpaceMap->numRels++;
566 /* Existing entry, advance its useCount */
567 if (++(fsmrel->useCount) >= INT_MAX / 2)
569 /* When useCounts threaten to overflow, reduce 'em all 2X */
570 for (oldrel = FreeSpaceMap->relList;
572 oldrel = oldrel->nextRel)
573 oldrel->useCount >>= 1;
575 /* If warranted, move it up the priority list */
576 oldrel = fsmrel->priorRel;
577 myCount = fsmrel->useCount;
578 if (oldrel && oldrel->useCount <= myCount)
580 unlink_fsm_rel(fsmrel);
581 while (oldrel && oldrel->useCount <= myCount)
582 oldrel = oldrel->priorRel;
583 link_fsm_rel_after(fsmrel, oldrel);
591 * Remove an existing FSMRelation entry.
594 delete_fsm_rel(FSMRelation *fsmrel)
598 free_chunk_chain(fsmrel->relChunks);
599 unlink_fsm_rel(fsmrel);
600 FreeSpaceMap->numRels--;
601 result = (FSMRelation *) hash_search(FreeSpaceMap->relHash,
602 (void *) &(fsmrel->key),
606 elog(ERROR, "FreeSpaceMap hashtable corrupted");
610 * Link a FSMRelation into the priority list just after the given existing
611 * entry (or at the head of the list, if oldrel is NULL).
614 link_fsm_rel_after(FSMRelation *fsmrel, FSMRelation *oldrel)
619 fsmrel->priorRel = NULL;
620 fsmrel->nextRel = FreeSpaceMap->relList;
621 FreeSpaceMap->relList = fsmrel;
622 if (fsmrel->nextRel != NULL)
623 fsmrel->nextRel->priorRel = fsmrel;
625 FreeSpaceMap->relListTail = fsmrel;
629 /* insert after oldrel */
630 fsmrel->priorRel = oldrel;
631 fsmrel->nextRel = oldrel->nextRel;
632 oldrel->nextRel = fsmrel;
633 if (fsmrel->nextRel != NULL)
634 fsmrel->nextRel->priorRel = fsmrel;
636 FreeSpaceMap->relListTail = fsmrel;
641 * Delink a FSMRelation from the priority list.
644 unlink_fsm_rel(FSMRelation *fsmrel)
646 if (fsmrel->priorRel != NULL)
647 fsmrel->priorRel->nextRel = fsmrel->nextRel;
649 FreeSpaceMap->relList = fsmrel->nextRel;
650 if (fsmrel->nextRel != NULL)
651 fsmrel->nextRel->priorRel = fsmrel->priorRel;
653 FreeSpaceMap->relListTail = fsmrel->priorRel;
657 * Return all the FSMChunks in the chain starting at fchunk to the freelist.
658 * (Caller must handle unlinking them from wherever they were.)
661 free_chunk_chain(FSMChunk *fchunk)
670 while (lchunk->next != NULL)
673 lchunk = lchunk->next;
675 lchunk->next = FreeSpaceMap->freeChunks;
676 FreeSpaceMap->freeChunks = fchunk;
677 FreeSpaceMap->numFreeChunks += nchunks;
681 * Look to see if a page with at least the specified amount of space is
682 * available in the given FSMRelation. If so, return its page number,
683 * and advance the nextPage counter so that the next inquiry will return
684 * a different page if possible. Return InvalidBlockNumber if no success.
687 find_free_space(FSMRelation *fsmrel, Size spaceNeeded)
689 int pagesToCheck, /* outer loop counter */
690 pageIndex, /* current page index relative to relation */
691 chunkRelIndex; /* current page index relative to curChunk */
694 pageIndex = fsmrel->nextPage;
695 /* Last operation may have left nextPage pointing past end */
696 if (pageIndex >= fsmrel->numPages)
698 curChunk = fsmrel->relChunks;
699 chunkRelIndex = pageIndex;
701 for (pagesToCheck = fsmrel->numPages; pagesToCheck > 0; pagesToCheck--)
704 * Make sure we are in the right chunk. First time through, we
705 * may have to advance through multiple chunks; subsequent loops
706 * should do this at most once.
708 while (chunkRelIndex >= curChunk->numPages)
710 chunkRelIndex -= curChunk->numPages;
711 curChunk = curChunk->next;
713 /* Check the next page */
714 if ((Size) curChunk->bytes[chunkRelIndex] >= spaceNeeded)
716 fsmrel->nextPage = pageIndex + 1;
717 return curChunk->pages[chunkRelIndex];
719 /* Advance pageIndex and chunkRelIndex, wrapping around if needed */
720 if (++pageIndex >= fsmrel->numPages)
723 curChunk = fsmrel->relChunks;
730 return InvalidBlockNumber; /* nothing found */
734 * fsm_record_free_space - guts of RecordFreeSpace, which is also used by
735 * RecordAndGetPageWithFreeSpace.
738 fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page, Size spaceAvail)
743 if (lookup_fsm_page_entry(fsmrel, page, &chunk, &chunkRelIndex))
745 /* Found an existing entry for page; update or delete it */
746 if (spaceAvail >= fsmrel->threshold)
747 chunk->bytes[chunkRelIndex] = (ItemLength) spaceAvail;
749 delete_fsm_page_entry(fsmrel, chunk, chunkRelIndex);
754 * No existing entry; add one if spaceAvail exceeds threshold.
756 * CORNER CASE: if we have to do acquire_fsm_free_space then our own
757 * threshold will increase, possibly meaning that we shouldn't
758 * store the page after all. Loop to redo the test if that
759 * happens. The loop also covers the possibility that
760 * acquire_fsm_free_space must be executed more than once to free
761 * any space (ie, thresholds must be more than doubled).
763 while (spaceAvail >= fsmrel->threshold)
765 if (insert_fsm_page_entry(fsmrel, page, spaceAvail,
766 chunk, chunkRelIndex))
768 /* No space, acquire some and recheck threshold */
769 acquire_fsm_free_space();
770 if (spaceAvail < fsmrel->threshold)
774 * Need to redo the lookup since our own page list may well
775 * have lost entries, so position is not correct anymore.
777 if (lookup_fsm_page_entry(fsmrel, page,
778 &chunk, &chunkRelIndex))
779 elog(ERROR, "fsm_record_free_space: unexpected match");
785 * Look for an entry for a specific page (block number) in a FSMRelation.
786 * Returns TRUE if a matching entry exists, else FALSE.
788 * The output arguments *outChunk, *outChunkRelIndex are set to indicate where
789 * the entry exists (if TRUE result) or could be inserted (if FALSE result).
790 * *chunk is set to NULL if there is no place to insert, ie, the entry would
791 * need to be added to a new chunk.
794 lookup_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page,
795 FSMChunk **outChunk, int *outChunkRelIndex)
800 for (chunk = fsmrel->relChunks; chunk; chunk = chunk->next)
802 int numPages = chunk->numPages;
804 /* Can skip the chunk quickly if page must be after last in chunk */
805 if (numPages > 0 && page <= chunk->pages[numPages - 1])
807 for (chunkRelIndex = 0; chunkRelIndex < numPages; chunkRelIndex++)
809 if (page <= chunk->pages[chunkRelIndex])
812 *outChunkRelIndex = chunkRelIndex;
813 return (page == chunk->pages[chunkRelIndex]);
816 /* Should not get here, given above test */
821 * If we are about to fall off the end, and there's space
822 * available in the end chunk, return a pointer to it.
824 if (chunk->next == NULL && numPages < CHUNKPAGES)
827 *outChunkRelIndex = numPages;
833 * Adding the page would require a new chunk (or, perhaps, compaction
834 * of available free space --- not my problem here).
837 *outChunkRelIndex = 0;
842 * Insert a new page entry into a FSMRelation's list at given position
843 * (chunk == NULL implies at end).
845 * If there is no space available to insert the entry, return FALSE.
848 insert_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page, Size spaceAvail,
849 FSMChunk *chunk, int chunkRelIndex)
851 /* Outer loop handles retry after compacting rel's page list */
854 if (fsmrel->numPages >= fsmrel->numChunks * CHUNKPAGES)
856 /* No free space within chunk list, so need another chunk */
857 if (!append_fsm_chunk(fsmrel))
858 return false; /* can't do it */
861 /* Original search found that new page belongs at end */
862 chunk = fsmrel->lastChunk;
868 * Try to insert it the easy way, ie, just move down subsequent
872 push_fsm_page_entry(page, spaceAvail, chunk, chunkRelIndex))
875 fsmrel->nextPage++; /* don't return same page twice running */
880 * There is space available, but evidently it's before the place
881 * where the page entry needs to go. Compact the list and try
882 * again. This will require us to redo the search for the
883 * appropriate place. Furthermore, compact_fsm_page_list deletes
884 * empty end chunks, so we may need to repeat the action of
885 * grabbing a new end chunk.
887 compact_fsm_page_list(fsmrel);
888 if (lookup_fsm_page_entry(fsmrel, page, &chunk, &chunkRelIndex))
889 elog(ERROR, "insert_fsm_page_entry: entry already exists!");
894 * Add one chunk to a FSMRelation's chunk list, if possible.
896 * Returns TRUE if successful, FALSE if no space available. Note that on
897 * success, the new chunk is easily accessible via fsmrel->lastChunk.
900 append_fsm_chunk(FSMRelation *fsmrel)
904 /* Remove a chunk from the freelist */
905 if ((newChunk = FreeSpaceMap->freeChunks) == NULL)
906 return false; /* can't do it */
907 FreeSpaceMap->freeChunks = newChunk->next;
908 FreeSpaceMap->numFreeChunks--;
910 /* Initialize chunk to empty */
911 newChunk->next = NULL;
912 newChunk->numPages = 0;
914 /* Link it into FSMRelation */
915 if (fsmrel->relChunks == NULL)
916 fsmrel->relChunks = newChunk;
918 fsmrel->lastChunk->next = newChunk;
919 fsmrel->lastChunk = newChunk;
926 * Auxiliary routine for insert_fsm_page_entry: try to push entries to the
927 * right to insert at chunk/chunkRelIndex. Return TRUE if successful.
928 * Note that the FSMRelation's own fields are not updated.
931 push_fsm_page_entry(BlockNumber page, Size spaceAvail,
932 FSMChunk *chunk, int chunkRelIndex)
936 if (chunk->numPages >= CHUNKPAGES)
938 if (chunk->next == NULL)
939 return false; /* no space */
940 /* try to push chunk's last item to next chunk */
941 if (!push_fsm_page_entry(chunk->pages[CHUNKPAGES - 1],
942 chunk->bytes[CHUNKPAGES - 1],
945 /* successfully pushed it */
948 for (i = chunk->numPages; i > chunkRelIndex; i--)
950 chunk->pages[i] = chunk->pages[i - 1];
951 chunk->bytes[i] = chunk->bytes[i - 1];
954 chunk->pages[chunkRelIndex] = page;
955 chunk->bytes[chunkRelIndex] = (ItemLength) spaceAvail;
960 * Delete one page entry from a FSMRelation's list. Compact free space
961 * in the list, but only if a chunk can be returned to the freelist.
964 delete_fsm_page_entry(FSMRelation *fsmrel, FSMChunk *chunk, int chunkRelIndex)
969 Assert(chunk && chunkRelIndex >= 0 && chunkRelIndex < chunk->numPages);
970 /* Compact out space in this chunk */
971 lim = --chunk->numPages;
972 for (i = chunkRelIndex; i < lim; i++)
974 chunk->pages[i] = chunk->pages[i + 1];
975 chunk->bytes[i] = chunk->bytes[i + 1];
977 /* Compact the whole list if a chunk can be freed */
979 if (fsmrel->numPages <= (fsmrel->numChunks - 1) * CHUNKPAGES)
980 compact_fsm_page_list(fsmrel);
984 * Remove any pages with free space less than the current threshold for the
985 * FSMRelation, compact out free slots in non-last chunks, and return any
986 * completely freed chunks to the freelist.
989 compact_fsm_page_list(FSMRelation *fsmrel)
991 Size threshold = fsmrel->threshold;
999 srcChunk = dstChunk = fsmrel->relChunks;
1000 srcIndex = dstIndex = 0;
1001 dstPages = 0; /* total pages kept */
1002 dstChunkCnt = 1; /* includes current dstChunk */
1004 while (srcChunk != NULL)
1006 int srcPages = srcChunk->numPages;
1008 while (srcIndex < srcPages)
1010 if ((Size) srcChunk->bytes[srcIndex] >= threshold)
1012 if (dstIndex >= CHUNKPAGES)
1015 * At this point srcChunk must be pointing to a later
1016 * chunk, so it's OK to overwrite dstChunk->numPages.
1018 dstChunk->numPages = dstIndex;
1019 dstChunk = dstChunk->next;
1023 dstChunk->pages[dstIndex] = srcChunk->pages[srcIndex];
1024 dstChunk->bytes[dstIndex] = srcChunk->bytes[srcIndex];
1030 srcChunk = srcChunk->next;
1036 /* No chunks to be kept at all */
1037 fsmrel->nextPage = 0;
1038 fsmrel->numPages = 0;
1039 fsmrel->numChunks = 0;
1040 fsmrel->relChunks = NULL;
1041 fsmrel->lastChunk = NULL;
1042 free_chunk_chain(dstChunk);
1046 /* we deliberately don't change nextPage here */
1047 fsmrel->numPages = dstPages;
1048 fsmrel->numChunks = dstChunkCnt;
1049 dstChunk->numPages = dstIndex;
1050 free_chunk_chain(dstChunk->next);
1051 dstChunk->next = NULL;
1052 fsmrel->lastChunk = dstChunk;
1057 * Acquire some free space by raising the thresholds of all FSMRelations.
1059 * Note there is no guarantee as to how much space will be freed by a single
1060 * invocation; caller may repeat if necessary.
1063 acquire_fsm_free_space(void)
1065 FSMRelation *fsmrel;
1067 for (fsmrel = FreeSpaceMap->relList; fsmrel; fsmrel = fsmrel->nextRel)
1069 fsmrel->threshold *= 2;
1070 /* Limit thresholds to BLCKSZ so they can't get ridiculously large */
1071 if (fsmrel->threshold > BLCKSZ)
1072 fsmrel->threshold = BLCKSZ;
1073 /* Release any pages that don't meet the new threshold */
1074 compact_fsm_page_list(fsmrel);
1079 #ifdef FREESPACE_DEBUG
1081 * Dump contents of freespace map for debugging.
1083 * We assume caller holds the FreeSpaceLock, or is otherwise unconcerned
1084 * about other processes.
1089 FSMRelation *fsmrel;
1090 FSMRelation *prevrel = NULL;
1097 for (fsmrel = FreeSpaceMap->relList; fsmrel; fsmrel = fsmrel->nextRel)
1100 fprintf(stderr, "Map %d: rel %u/%u useCount %d threshold %u nextPage %d\nMap= ",
1101 relNum, fsmrel->key.tblNode, fsmrel->key.relNode,
1102 fsmrel->useCount, fsmrel->threshold, fsmrel->nextPage);
1103 nChunks = nPages = 0;
1104 for (chunk = fsmrel->relChunks; chunk; chunk = chunk->next)
1106 int numPages = chunk->numPages;
1109 for (chunkRelIndex = 0; chunkRelIndex < numPages; chunkRelIndex++)
1112 fprintf(stderr, " %u:%u",
1113 chunk->pages[chunkRelIndex],
1114 chunk->bytes[chunkRelIndex]);
1117 fprintf(stderr, "\n");
1118 /* Cross-check local counters and list links */
1119 if (nPages != fsmrel->numPages)
1120 fprintf(stderr, "DumpFreeSpace: %d pages in rel, but numPages = %d\n",
1121 nPages, fsmrel->numPages);
1122 if (nChunks != fsmrel->numChunks)
1123 fprintf(stderr, "DumpFreeSpace: %d chunks in rel, but numChunks = %d\n",
1124 nChunks, fsmrel->numChunks);
1125 if (prevrel != fsmrel->priorRel)
1126 fprintf(stderr, "DumpFreeSpace: broken list links\n");
1129 if (prevrel != FreeSpaceMap->relListTail)
1130 fprintf(stderr, "DumpFreeSpace: broken list links\n");
1131 /* Cross-check global counters */
1132 if (relNum != FreeSpaceMap->numRels)
1133 fprintf(stderr, "DumpFreeSpace: %d rels in list, but numRels = %d\n",
1134 relNum, FreeSpaceMap->numRels);
1136 for (chunk = FreeSpaceMap->freeChunks; chunk; chunk = chunk->next)
1138 if (nChunks != FreeSpaceMap->numFreeChunks)
1139 fprintf(stderr, "DumpFreeSpace: %d chunks in list, but numFreeChunks = %d\n",
1140 nChunks, FreeSpaceMap->numFreeChunks);
1143 #endif /* FREESPACE_DEBUG */