1 /*-------------------------------------------------------------------------
4 * POSTGRES free space map for quickly finding free space in relations
7 * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * $Header: /cvsroot/pgsql/src/backend/storage/freespace/freespace.c,v 1.7 2001/10/05 17:28:12 tgl Exp $
16 * The only really interesting aspect of this code is the heuristics for
17 * deciding how much information we can afford to keep about each relation,
18 * given that we have a limited amount of workspace in shared memory.
19 * These currently work as follows:
21 * The number of distinct relations tracked is limited by a configuration
22 * variable (MaxFSMRelations). When this would be exceeded, we discard the
23 * least frequently used relation. A previously-unknown relation is always
24 * entered into the map with useCount 1 on first reference, even if this
25 * causes an existing entry with higher useCount to be discarded. This may
26 * cause a little bit of thrashing among the bottom entries in the list,
27 * but if we didn't do it then there'd be no way for a relation not in the
28 * map to get in once the map is full. Note we allow a relation to be in the
29 * map even if no pages are currently stored for it: this allows us to track
30 * its useCount & threshold, which may eventually go high enough to give it
31 * priority for page storage.
33 * The total number of pages tracked is also set by a configuration variable
34 * (MaxFSMPages). We allocate these dynamically among the known relations,
35 * giving preference to the more-frequently-referenced relations and those
36 * with smaller tuples. This is done by a thresholding mechanism: for each
37 * relation we keep track of a current target threshold, and allow only pages
38 * with free space >= threshold to be stored in the map. The threshold starts
39 * at BLCKSZ/2 (a somewhat arbitrary choice) for a newly entered relation.
40 * On each GetFreeSpace reference, the threshold is moved towards the
41 * request size using a slow exponential moving average; this means that
42 * for heavily used relations, the threshold will approach the average
43 * freespace request size (average tuple size). Whenever we run out of
44 * storage space in the map, we double the threshold of all existing relations
45 * (but not to more than BLCKSZ, so as to prevent runaway thresholds).
46 * Infrequently used relations will thus tend to have large thresholds.
48 * XXX this thresholding mechanism is experimental; need to see how well
49 * it works in practice.
51 *-------------------------------------------------------------------------
57 #include "storage/freespace.h"
58 #include "storage/itemid.h"
59 #include "storage/lwlock.h"
60 #include "storage/shmem.h"
64 * Shared free-space-map objects
66 * Note: we handle pointers to these items as pointers, not as SHMEM_OFFSETs.
67 * This assumes that all processes accessing the map will have the shared
68 * memory segment mapped at the same place in their address space.
70 typedef struct FSMHeader FSMHeader;
71 typedef struct FSMRelation FSMRelation;
72 typedef struct FSMChunk FSMChunk;
74 /* Header for whole map */
77 HTAB *relHash; /* hashtable of FSMRelation entries */
78 FSMRelation *relList; /* FSMRelations in useCount order */
79 FSMRelation *relListTail; /* tail of FSMRelation list */
80 int numRels; /* number of FSMRelations now in use */
81 FSMChunk *freeChunks; /* linked list of currently-free chunks */
82 int numFreeChunks; /* number of free chunks */
86 * Per-relation struct --- this is an entry in the shared hash table.
87 * The hash key is the RelFileNode value (hence, we look at the physical
88 * relation ID, not the logical ID, which is appropriate).
92 RelFileNode key; /* hash key (must be first) */
93 FSMRelation *nextRel; /* next rel in useCount order */
94 FSMRelation *priorRel; /* prior rel in useCount order */
95 int useCount; /* use count for prioritizing rels */
96 Size threshold; /* minimum amount of free space to keep */
97 int nextPage; /* index (from 0) to start next search at */
98 int numPages; /* total number of pages we have info about */
99 int numChunks; /* number of FSMChunks allocated to rel */
100 FSMChunk *relChunks; /* linked list of page info chunks */
104 * Info about individual pages in a relation is stored in chunks to reduce
105 * allocation overhead. Note that we allow any chunk of a relation's list
106 * to be partly full, not only the last chunk; this speeds deletion of
107 * individual page entries. When the total number of unused slots reaches
108 * CHUNKPAGES, we compact out the unused slots so that we can return a chunk
109 * to the freelist; but there's no point in doing the compaction before that.
112 #define CHUNKPAGES 32 /* each chunk can store this many pages */
116 FSMChunk *next; /* linked-list link */
117 int numPages; /* number of pages described here */
118 BlockNumber pages[CHUNKPAGES]; /* page numbers within relation */
119 ItemLength bytes[CHUNKPAGES]; /* free space available on each page */
123 int MaxFSMRelations; /* these are set by guc.c */
126 static FSMHeader *FreeSpaceMap; /* points to FSMHeader in shared memory */
129 static FSMRelation *lookup_fsm_rel(RelFileNode *rel);
130 static FSMRelation *create_fsm_rel(RelFileNode *rel);
131 static void delete_fsm_rel(FSMRelation *fsmrel);
132 static void link_fsm_rel_after(FSMRelation *fsmrel, FSMRelation *oldrel);
133 static void unlink_fsm_rel(FSMRelation *fsmrel);
134 static void free_chunk_chain(FSMChunk *fchunk);
135 static BlockNumber find_free_space(FSMRelation *fsmrel, Size spaceNeeded);
136 static void fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page,
138 static bool lookup_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page,
139 FSMChunk **outChunk, int *outChunkRelIndex);
140 static bool insert_fsm_page_entry(FSMRelation *fsmrel,
141 BlockNumber page, Size spaceAvail,
142 FSMChunk *chunk, int chunkRelIndex);
143 static bool push_fsm_page_entry(BlockNumber page, Size spaceAvail,
144 FSMChunk *chunk, int chunkRelIndex);
145 static void delete_fsm_page_entry(FSMRelation *fsmrel, FSMChunk *chunk,
147 static void compact_fsm_page_list(FSMRelation *fsmrel);
148 static void acquire_fsm_free_space(void);
157 * InitFreeSpaceMap -- Initialize the freespace module.
159 * This must be called once during shared memory initialization.
160 * It builds the empty free space map table. FreeSpaceLock must also be
161 * initialized at some point, but is not touched here --- we assume there is
162 * no need for locking, since only the calling process can be accessing shared
166 InitFreeSpaceMap(void)
173 /* Create table header */
174 FreeSpaceMap = (FSMHeader *) ShmemAlloc(sizeof(FSMHeader));
175 if (FreeSpaceMap == NULL)
176 elog(FATAL, "Insufficient shared memory for free space map");
177 MemSet(FreeSpaceMap, 0, sizeof(FSMHeader));
179 /* Create hashtable for FSMRelations */
180 info.keysize = sizeof(RelFileNode);
181 info.entrysize = sizeof(FSMRelation);
182 info.hash = tag_hash;
184 FreeSpaceMap->relHash = ShmemInitHash("Free Space Map Hash",
185 MaxFSMRelations / 10,
188 (HASH_ELEM | HASH_FUNCTION));
190 if (!FreeSpaceMap->relHash)
191 elog(FATAL, "Insufficient shared memory for free space map");
193 /* Allocate FSMChunks and fill up the free-chunks list */
194 nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1;
195 FreeSpaceMap->numFreeChunks = nchunks;
197 chunks = (FSMChunk *) ShmemAlloc(nchunks * sizeof(FSMChunk));
199 elog(FATAL, "Insufficient shared memory for free space map");
202 while (nchunks-- > 0)
204 chunks->next = prevchunk;
208 FreeSpaceMap->freeChunks = prevchunk;
212 * Estimate amount of shmem space needed for FSM.
215 FreeSpaceShmemSize(void)
221 size = MAXALIGN(sizeof(FSMHeader));
223 /* hash table, including the FSMRelation objects */
224 size += hash_estimate_size(MaxFSMRelations, sizeof(FSMRelation));
226 /* FSMChunk objects */
227 nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1;
229 size += MAXALIGN(nchunks * sizeof(FSMChunk));
235 * GetPageWithFreeSpace - try to find a page in the given relation with
236 * at least the specified amount of free space.
238 * If successful, return the block number; if not, return InvalidBlockNumber.
240 * The caller must be prepared for the possibility that the returned page
241 * will turn out to have too little space available by the time the caller
242 * gets a lock on it. In that case, the caller should report the actual
243 * amount of free space available on that page (via RecordFreeSpace) and
244 * then try again. If InvalidBlockNumber is returned, extend the relation.
247 GetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded)
250 BlockNumber freepage;
252 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
254 * We always add a rel to the hashtable when it is inquired about.
256 fsmrel = create_fsm_rel(rel);
258 * Adjust the threshold towards the space request. This essentially
259 * implements an exponential moving average with an equivalent period
260 * of about 63 requests. Ignore silly requests, however, to ensure
261 * that the average stays in bounds.
263 * In theory, if the threshold increases here we should immediately
264 * delete any pages that fall below the new threshold. In practice
265 * it seems OK to wait until we have a need to compact space.
267 if (spaceNeeded > 0 && spaceNeeded < BLCKSZ)
269 int cur_avg = (int) fsmrel->threshold;
271 cur_avg += ((int) spaceNeeded - cur_avg) / 32;
272 fsmrel->threshold = (Size) cur_avg;
274 freepage = find_free_space(fsmrel, spaceNeeded);
275 LWLockRelease(FreeSpaceLock);
280 * RecordFreeSpace - record the amount of free space available on a page.
282 * The FSM is at liberty to forget about the page instead, if the amount of
283 * free space is too small to be interesting. The only guarantee is that
284 * a subsequent request to get a page with more than spaceAvail space free
285 * will not return this page.
288 RecordFreeSpace(RelFileNode *rel, BlockNumber page, Size spaceAvail)
292 /* Sanity check: ensure spaceAvail will fit into ItemLength */
293 AssertArg(spaceAvail < BLCKSZ);
295 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
297 * We choose not to add rels to the hashtable unless they've been
298 * inquired about with GetPageWithFreeSpace. Also, a Record operation
299 * does not increase the useCount or change threshold, only Get does.
301 fsmrel = lookup_fsm_rel(rel);
303 fsm_record_free_space(fsmrel, page, spaceAvail);
304 LWLockRelease(FreeSpaceLock);
308 * RecordAndGetPageWithFreeSpace - combo form to save one lock and
309 * hash table lookup cycle.
312 RecordAndGetPageWithFreeSpace(RelFileNode *rel,
318 BlockNumber freepage;
320 /* Sanity check: ensure spaceAvail will fit into ItemLength */
321 AssertArg(oldSpaceAvail < BLCKSZ);
323 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
325 * We always add a rel to the hashtable when it is inquired about.
327 fsmrel = create_fsm_rel(rel);
329 * Adjust the threshold towards the space request, same as in
330 * GetPageWithFreeSpace.
332 * Note that we do this before storing data for oldPage, which means
333 * this isn't exactly equivalent to Record followed by Get; but it
334 * seems appropriate to adjust the threshold first.
336 if (spaceNeeded > 0 && spaceNeeded < BLCKSZ)
338 int cur_avg = (int) fsmrel->threshold;
340 cur_avg += ((int) spaceNeeded - cur_avg) / 32;
341 fsmrel->threshold = (Size) cur_avg;
344 fsm_record_free_space(fsmrel, oldPage, oldSpaceAvail);
346 freepage = find_free_space(fsmrel, spaceNeeded);
347 LWLockRelease(FreeSpaceLock);
352 * MultiRecordFreeSpace - record available-space info about multiple pages
353 * of a relation in one call.
355 * First, if minPage <= maxPage, the FSM must discard any entries it has for
356 * pages in that page number range (inclusive). This allows obsolete info
357 * to be discarded. Second, if nPages > 0, record the page numbers and free
358 * space amounts in the given arrays. As with RecordFreeSpace, the FSM is at
359 * liberty to discard some of the information. However, it *must* discard
360 * previously stored info in the minPage..maxPage range (for example, this
361 * case is used to remove info about deleted pages during relation truncation).
364 MultiRecordFreeSpace(RelFileNode *rel,
374 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
375 fsmrel = lookup_fsm_rel(rel);
379 * Remove entries in specified range
381 if (minPage <= maxPage)
387 /* Use lookup to locate first entry >= minPage */
388 lookup_fsm_page_entry(fsmrel, minPage, &chunk, &chunkRelIndex);
389 /* Set free space to 0 for each page within range */
391 while (chunk && !done)
393 int numPages = chunk->numPages;
395 for (; chunkRelIndex < numPages; chunkRelIndex++)
397 if (chunk->pages[chunkRelIndex] > maxPage)
402 chunk->bytes[chunkRelIndex] = 0;
407 /* Now compact out the zeroed entries */
408 compact_fsm_page_list(fsmrel);
411 * Add new entries, if appropriate.
413 * XXX we could probably be smarter about this than doing it
414 * completely separately for each one. FIXME later.
416 * One thing we can do is short-circuit the process entirely if
417 * a page (a) has too little free space to be recorded, and (b)
418 * is within the minPage..maxPage range --- then we deleted any
419 * old entry above, and we aren't going to make a new one.
420 * This is particularly useful since in most cases, all the passed
421 * pages will in fact be in the minPage..maxPage range.
423 for (i = 0; i < nPages; i++)
425 BlockNumber page = pages[i];
426 Size avail = spaceAvail[i];
428 if (avail >= fsmrel->threshold ||
429 page < minPage || page > maxPage)
430 fsm_record_free_space(fsmrel, page, avail);
433 LWLockRelease(FreeSpaceLock);
437 * FreeSpaceMapForgetRel - forget all about a relation.
439 * This is called when a relation is deleted. Although we could just let
440 * the rel age out of the map, it's better to reclaim and reuse the space
444 FreeSpaceMapForgetRel(RelFileNode *rel)
448 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
449 fsmrel = lookup_fsm_rel(rel);
451 delete_fsm_rel(fsmrel);
452 LWLockRelease(FreeSpaceLock);
456 * FreeSpaceMapForgetDatabase - forget all relations of a database.
458 * This is called during DROP DATABASE. As above, might as well reclaim
459 * map space sooner instead of later.
461 * XXX when we implement tablespaces, target Oid will need to be tablespace
462 * ID not database ID.
465 FreeSpaceMapForgetDatabase(Oid dbid)
470 LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
471 for (fsmrel = FreeSpaceMap->relList; fsmrel; fsmrel = nextrel)
473 nextrel = fsmrel->nextRel; /* in case we delete it */
474 if (fsmrel->key.tblNode == dbid)
475 delete_fsm_rel(fsmrel);
477 LWLockRelease(FreeSpaceLock);
482 * Internal routines. These all assume the caller holds the FreeSpaceLock.
486 * Lookup a relation in the hash table. If not present, return NULL.
487 * The relation's useCount is not changed.
490 lookup_fsm_rel(RelFileNode *rel)
494 fsmrel = (FSMRelation *) hash_search(FreeSpaceMap->relHash,
505 * Lookup a relation in the hash table, creating an entry if not present.
507 * On successful lookup, the relation's useCount is incremented, possibly
508 * causing it to move up in the priority list.
511 create_fsm_rel(RelFileNode *rel)
517 fsmrel = (FSMRelation *) hash_search(FreeSpaceMap->relHash,
522 elog(ERROR, "FreeSpaceMap hashtable out of memory");
526 /* New hashtable entry, initialize it (hash_search set the key) */
527 fsmrel->useCount = 1;
528 fsmrel->threshold = BLCKSZ/2; /* starting point for new entry */
529 fsmrel->nextPage = 0;
530 fsmrel->numPages = 0;
531 fsmrel->numChunks = 0;
532 fsmrel->relChunks = NULL;
533 /* Discard lowest-priority existing rel, if we are over limit */
534 if (FreeSpaceMap->numRels >= MaxFSMRelations)
535 delete_fsm_rel(FreeSpaceMap->relListTail);
537 * Add new entry in front of any others with useCount 1 (since it
538 * is more recently used than them).
540 oldrel = FreeSpaceMap->relListTail;
541 while (oldrel && oldrel->useCount <= 1)
542 oldrel = oldrel->priorRel;
543 link_fsm_rel_after(fsmrel, oldrel);
544 FreeSpaceMap->numRels++;
550 /* Existing entry, advance its useCount */
551 if (++(fsmrel->useCount) >= INT_MAX/2)
553 /* When useCounts threaten to overflow, reduce 'em all 2X */
554 for (oldrel = FreeSpaceMap->relList;
556 oldrel = oldrel->nextRel)
558 oldrel->useCount >>= 1;
561 /* If warranted, move it up the priority list */
562 oldrel = fsmrel->priorRel;
563 myCount = fsmrel->useCount;
564 if (oldrel && oldrel->useCount <= myCount)
566 unlink_fsm_rel(fsmrel);
567 while (oldrel && oldrel->useCount <= myCount)
568 oldrel = oldrel->priorRel;
569 link_fsm_rel_after(fsmrel, oldrel);
577 * Remove an existing FSMRelation entry.
580 delete_fsm_rel(FSMRelation *fsmrel)
584 free_chunk_chain(fsmrel->relChunks);
585 unlink_fsm_rel(fsmrel);
586 FreeSpaceMap->numRels--;
587 result = (FSMRelation *) hash_search(FreeSpaceMap->relHash,
588 (void *) &(fsmrel->key),
592 elog(ERROR, "FreeSpaceMap hashtable corrupted");
596 * Link a FSMRelation into the priority list just after the given existing
597 * entry (or at the head of the list, if oldrel is NULL).
600 link_fsm_rel_after(FSMRelation *fsmrel, FSMRelation *oldrel)
605 fsmrel->priorRel = NULL;
606 fsmrel->nextRel = FreeSpaceMap->relList;
607 FreeSpaceMap->relList = fsmrel;
608 if (fsmrel->nextRel != NULL)
609 fsmrel->nextRel->priorRel = fsmrel;
611 FreeSpaceMap->relListTail = fsmrel;
615 /* insert after oldrel */
616 fsmrel->priorRel = oldrel;
617 fsmrel->nextRel = oldrel->nextRel;
618 oldrel->nextRel = fsmrel;
619 if (fsmrel->nextRel != NULL)
620 fsmrel->nextRel->priorRel = fsmrel;
622 FreeSpaceMap->relListTail = fsmrel;
627 * Delink a FSMRelation from the priority list.
630 unlink_fsm_rel(FSMRelation *fsmrel)
632 if (fsmrel->priorRel != NULL)
633 fsmrel->priorRel->nextRel = fsmrel->nextRel;
635 FreeSpaceMap->relList = fsmrel->nextRel;
636 if (fsmrel->nextRel != NULL)
637 fsmrel->nextRel->priorRel = fsmrel->priorRel;
639 FreeSpaceMap->relListTail = fsmrel->priorRel;
643 * Return all the FSMChunks in the chain starting at fchunk to the freelist.
644 * (Caller must handle unlinking them from wherever they were.)
647 free_chunk_chain(FSMChunk *fchunk)
656 while (lchunk->next != NULL)
659 lchunk = lchunk->next;
661 lchunk->next = FreeSpaceMap->freeChunks;
662 FreeSpaceMap->freeChunks = fchunk;
663 FreeSpaceMap->numFreeChunks += nchunks;
667 * Look to see if a page with at least the specified amount of space is
668 * available in the given FSMRelation. If so, return its page number,
669 * and advance the nextPage counter so that the next inquiry will return
670 * a different page if possible. Return InvalidBlockNumber if no success.
673 find_free_space(FSMRelation *fsmrel, Size spaceNeeded)
675 int pagesToCheck, /* outer loop counter */
676 pageIndex, /* current page index relative to relation */
677 chunkRelIndex; /* current page index relative to curChunk */
680 pageIndex = fsmrel->nextPage;
681 /* Last operation may have left nextPage pointing past end */
682 if (pageIndex >= fsmrel->numPages)
684 curChunk = fsmrel->relChunks;
685 chunkRelIndex = pageIndex;
687 for (pagesToCheck = fsmrel->numPages; pagesToCheck > 0; pagesToCheck--)
690 * Make sure we are in the right chunk. First time through, we
691 * may have to advance through multiple chunks; subsequent loops
692 * should do this at most once.
694 while (chunkRelIndex >= curChunk->numPages)
696 chunkRelIndex -= curChunk->numPages;
697 curChunk = curChunk->next;
699 /* Check the next page */
700 if ((Size) curChunk->bytes[chunkRelIndex] >= spaceNeeded)
702 fsmrel->nextPage = pageIndex+1;
703 return curChunk->pages[chunkRelIndex];
705 /* Advance pageIndex and chunkRelIndex, wrapping around if needed */
706 if (++pageIndex >= fsmrel->numPages)
709 curChunk = fsmrel->relChunks;
716 return InvalidBlockNumber; /* nothing found */
720 * fsm_record_free_space - guts of RecordFreeSpace, which is also used by
721 * RecordAndGetPageWithFreeSpace.
724 fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page, Size spaceAvail)
729 if (lookup_fsm_page_entry(fsmrel, page, &chunk, &chunkRelIndex))
731 /* Found an existing entry for page; update or delete it */
732 if (spaceAvail >= fsmrel->threshold)
733 chunk->bytes[chunkRelIndex] = (ItemLength) spaceAvail;
735 delete_fsm_page_entry(fsmrel, chunk, chunkRelIndex);
740 * No existing entry; add one if spaceAvail exceeds threshold.
742 * CORNER CASE: if we have to do acquire_fsm_free_space then
743 * our own threshold will increase, possibly meaning that we
744 * shouldn't store the page after all. Loop to redo the test
745 * if that happens. The loop also covers the possibility that
746 * acquire_fsm_free_space must be executed more than once to
747 * free any space (ie, thresholds must be more than doubled).
749 while (spaceAvail >= fsmrel->threshold)
751 if (insert_fsm_page_entry(fsmrel, page, spaceAvail,
752 chunk, chunkRelIndex))
754 /* No space, acquire some and recheck threshold */
755 acquire_fsm_free_space();
756 if (spaceAvail < fsmrel->threshold)
759 * Need to redo the lookup since our own page list may well
760 * have lost entries, so position is not correct anymore.
762 if (lookup_fsm_page_entry(fsmrel, page,
763 &chunk, &chunkRelIndex))
764 elog(ERROR, "fsm_record_free_space: unexpected match");
770 * Look for an entry for a specific page (block number) in a FSMRelation.
771 * Returns TRUE if a matching entry exists, else FALSE.
773 * The output arguments *outChunk, *outChunkRelIndex are set to indicate where
774 * the entry exists (if TRUE result) or could be inserted (if FALSE result).
775 * *chunk is set to NULL if there is no place to insert, ie, the entry would
776 * need to be added to a new chunk.
779 lookup_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page,
780 FSMChunk **outChunk, int *outChunkRelIndex)
785 for (chunk = fsmrel->relChunks; chunk; chunk = chunk->next)
787 int numPages = chunk->numPages;
789 /* Can skip the chunk quickly if page must be after last in chunk */
790 if (numPages > 0 && page <= chunk->pages[numPages-1])
792 for (chunkRelIndex = 0; chunkRelIndex < numPages; chunkRelIndex++)
794 if (page <= chunk->pages[chunkRelIndex])
797 *outChunkRelIndex = chunkRelIndex;
798 return (page == chunk->pages[chunkRelIndex]);
801 /* Should not get here, given above test */
805 * If we are about to fall off the end, and there's space available
806 * in the end chunk, return a pointer to it.
808 if (chunk->next == NULL && numPages < CHUNKPAGES)
811 *outChunkRelIndex = numPages;
816 * Adding the page would require a new chunk (or, perhaps, compaction
817 * of available free space --- not my problem here).
820 *outChunkRelIndex = 0;
825 * Insert a new page entry into a FSMRelation's list at given position
826 * (chunk == NULL implies at end).
828 * If there is no space available to insert the entry, return FALSE.
831 insert_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page, Size spaceAvail,
832 FSMChunk *chunk, int chunkRelIndex)
835 int newChunkRelIndex;
837 if (fsmrel->numPages >= fsmrel->numChunks * CHUNKPAGES)
839 /* No free space within chunk list, so need another chunk */
840 if ((newChunk = FreeSpaceMap->freeChunks) == NULL)
841 return false; /* can't do it */
842 FreeSpaceMap->freeChunks = newChunk->next;
843 FreeSpaceMap->numFreeChunks--;
844 newChunk->next = NULL;
845 newChunk->numPages = 0;
846 if (fsmrel->relChunks == NULL)
847 fsmrel->relChunks = newChunk;
850 FSMChunk *priorChunk = fsmrel->relChunks;
852 while (priorChunk->next != NULL)
853 priorChunk = priorChunk->next;
854 priorChunk->next = newChunk;
859 /* Original search found that new page belongs at end */
865 /* Try to insert it the easy way, ie, just move down subsequent data */
867 push_fsm_page_entry(page, spaceAvail, chunk, chunkRelIndex))
870 fsmrel->nextPage++; /* don't return same page twice running */
874 * There is space available, but evidently it's before the place
875 * where the page entry needs to go. Compact the list and try again.
876 * This will require us to redo the search for the appropriate place.
878 compact_fsm_page_list(fsmrel);
879 if (lookup_fsm_page_entry(fsmrel, page, &newChunk, &newChunkRelIndex))
880 elog(ERROR, "insert_fsm_page_entry: entry already exists!");
882 push_fsm_page_entry(page, spaceAvail, newChunk, newChunkRelIndex))
885 fsmrel->nextPage++; /* don't return same page twice running */
888 /* Shouldn't get here given the initial if-test for space available */
889 elog(ERROR, "insert_fsm_page_entry: failed to insert entry!");
890 return false; /* keep compiler quiet */
894 * Auxiliary routine for insert_fsm_page_entry: try to push entries to the
895 * right to insert at chunk/chunkRelIndex. Return TRUE if successful.
896 * Note that the FSMRelation's own fields are not updated.
899 push_fsm_page_entry(BlockNumber page, Size spaceAvail,
900 FSMChunk *chunk, int chunkRelIndex)
904 if (chunk->numPages >= CHUNKPAGES)
906 if (chunk->next == NULL)
907 return false; /* no space */
908 /* try to push chunk's last item to next chunk */
909 if (! push_fsm_page_entry(chunk->pages[CHUNKPAGES-1],
910 chunk->bytes[CHUNKPAGES-1],
913 /* successfully pushed it */
916 for (i = chunk->numPages; i > chunkRelIndex; i--)
918 chunk->pages[i] = chunk->pages[i-1];
919 chunk->bytes[i] = chunk->bytes[i-1];
922 chunk->pages[chunkRelIndex] = page;
923 chunk->bytes[chunkRelIndex] = (ItemLength) spaceAvail;
928 * Delete one page entry from a FSMRelation's list. Compact free space
929 * in the list, but only if a chunk can be returned to the freelist.
932 delete_fsm_page_entry(FSMRelation *fsmrel, FSMChunk *chunk, int chunkRelIndex)
937 Assert(chunk && chunkRelIndex >= 0 && chunkRelIndex < chunk->numPages);
938 /* Compact out space in this chunk */
939 lim = --chunk->numPages;
940 for (i = chunkRelIndex; i < lim; i++)
942 chunk->pages[i] = chunk->pages[i+1];
943 chunk->bytes[i] = chunk->bytes[i+1];
945 /* Compact the whole list if a chunk can be freed */
947 if (fsmrel->numPages <= (fsmrel->numChunks-1) * CHUNKPAGES)
948 compact_fsm_page_list(fsmrel);
952 * Remove any pages with free space less than the current threshold for the
953 * FSMRelation, compact out free slots in non-last chunks, and return any
954 * completely freed chunks to the freelist.
957 compact_fsm_page_list(FSMRelation *fsmrel)
959 Size threshold = fsmrel->threshold;
967 srcChunk = dstChunk = fsmrel->relChunks;
968 srcIndex = dstIndex = 0;
969 dstPages = 0; /* total pages kept */
970 dstChunkCnt = 1; /* includes current dstChunk */
972 while (srcChunk != NULL)
974 int srcPages = srcChunk->numPages;
976 while (srcIndex < srcPages)
978 if ((Size) srcChunk->bytes[srcIndex] >= threshold)
980 if (dstIndex >= CHUNKPAGES)
983 * At this point srcChunk must be pointing to a later
984 * chunk, so it's OK to overwrite dstChunk->numPages.
986 dstChunk->numPages = dstIndex;
987 dstChunk = dstChunk->next;
991 dstChunk->pages[dstIndex] = srcChunk->pages[srcIndex];
992 dstChunk->bytes[dstIndex] = srcChunk->bytes[srcIndex];
998 srcChunk = srcChunk->next;
1004 /* No chunks to be kept at all */
1005 fsmrel->nextPage = 0;
1006 fsmrel->numPages = 0;
1007 fsmrel->numChunks = 0;
1008 fsmrel->relChunks = NULL;
1009 free_chunk_chain(dstChunk);
1013 /* we deliberately don't change nextPage here */
1014 fsmrel->numPages = dstPages;
1015 fsmrel->numChunks = dstChunkCnt;
1016 dstChunk->numPages = dstIndex;
1017 free_chunk_chain(dstChunk->next);
1018 dstChunk->next = NULL;
1023 * Acquire some free space by raising the thresholds of all FSMRelations.
1025 * Note there is no guarantee as to how much space will be freed by a single
1026 * invocation; caller may repeat if necessary.
1029 acquire_fsm_free_space(void)
1031 FSMRelation *fsmrel;
1033 for (fsmrel = FreeSpaceMap->relList; fsmrel; fsmrel = fsmrel->nextRel)
1035 fsmrel->threshold *= 2;
1036 /* Limit thresholds to BLCKSZ so they can't get ridiculously large */
1037 if (fsmrel->threshold > BLCKSZ)
1038 fsmrel->threshold = BLCKSZ;
1039 /* Release any pages that don't meet the new threshold */
1040 compact_fsm_page_list(fsmrel);
1045 #ifdef FREESPACE_DEBUG
1047 * Dump contents of freespace map for debugging.
1049 * We assume caller holds the FreeSpaceLock, or is otherwise unconcerned
1050 * about other processes.
1055 FSMRelation *fsmrel;
1056 FSMRelation *prevrel = NULL;
1063 for (fsmrel = FreeSpaceMap->relList; fsmrel; fsmrel = fsmrel->nextRel)
1066 fprintf(stderr, "Map %d: rel %u/%u useCount %d threshold %u nextPage %d\nMap= ",
1067 relNum, fsmrel->key.tblNode, fsmrel->key.relNode,
1068 fsmrel->useCount, fsmrel->threshold, fsmrel->nextPage);
1069 nChunks = nPages = 0;
1070 for (chunk = fsmrel->relChunks; chunk; chunk = chunk->next)
1072 int numPages = chunk->numPages;
1075 for (chunkRelIndex = 0; chunkRelIndex < numPages; chunkRelIndex++)
1078 fprintf(stderr, " %u:%u",
1079 chunk->pages[chunkRelIndex],
1080 chunk->bytes[chunkRelIndex]);
1083 fprintf(stderr, "\n");
1084 /* Cross-check local counters and list links */
1085 if (nPages != fsmrel->numPages)
1086 fprintf(stderr, "DumpFreeSpace: %d pages in rel, but numPages = %d\n",
1087 nPages, fsmrel->numPages);
1088 if (nChunks != fsmrel->numChunks)
1089 fprintf(stderr, "DumpFreeSpace: %d chunks in rel, but numChunks = %d\n",
1090 nChunks, fsmrel->numChunks);
1091 if (prevrel != fsmrel->priorRel)
1092 fprintf(stderr, "DumpFreeSpace: broken list links\n");
1095 if (prevrel != FreeSpaceMap->relListTail)
1096 fprintf(stderr, "DumpFreeSpace: broken list links\n");
1097 /* Cross-check global counters */
1098 if (relNum != FreeSpaceMap->numRels)
1099 fprintf(stderr, "DumpFreeSpace: %d rels in list, but numRels = %d\n",
1100 relNum, FreeSpaceMap->numRels);
1102 for (chunk = FreeSpaceMap->freeChunks; chunk; chunk = chunk->next)
1104 if (nChunks != FreeSpaceMap->numFreeChunks)
1105 fprintf(stderr, "DumpFreeSpace: %d chunks in list, but numFreeChunks = %d\n",
1106 nChunks, FreeSpaceMap->numFreeChunks);
1109 #endif /* FREESPACE_DEBUG */