1 /*-------------------------------------------------------------------------
7 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
11 * $PostgreSQL: pgsql/src/backend/access/gist/gistsplit.c,v 1.3 2006/10/04 00:29:48 momjian Exp $
13 *-------------------------------------------------------------------------
17 #include "access/gist_private.h"
23 OffsetNumber *entries;
30 * Forms unions of subkeys after page split, but
31 * uses only tuples aren't in groups of equalent tuples
34 gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
35 GistSplitUnion *gsvp, int startkey)
37 IndexTuple *cleanedItVec;
41 cleanedItVec = (IndexTuple *) palloc(sizeof(IndexTuple) * gsvp->len);
43 for (i = 0; i < gsvp->len; i++)
45 if (gsvp->equiv && gsvp->equiv[gsvp->entries[i]])
48 cleanedItVec[cleanedLen++] = itvec[gsvp->entries[i] - 1];
51 gistMakeUnionItVec(giststate, cleanedItVec, cleanedLen, startkey,
52 gsvp->attr, gsvp->isnull);
58 * unions subkeys for after user picksplit over attno-1 column
61 gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GistSplitVector *spl, int attno)
65 gsvp.equiv = spl->spl_equiv;
67 gsvp.attr = spl->spl_lattr;
68 gsvp.len = spl->splitVector.spl_nleft;
69 gsvp.entries = spl->splitVector.spl_left;
70 gsvp.isnull = spl->spl_lisnull;
72 gistunionsubkeyvec(giststate, itvec, &gsvp, attno);
74 gsvp.attr = spl->spl_rattr;
75 gsvp.len = spl->splitVector.spl_nright;
76 gsvp.entries = spl->splitVector.spl_right;
77 gsvp.isnull = spl->spl_risnull;
79 gistunionsubkeyvec(giststate, itvec, &gsvp, attno);
83 * find group in vector with equivalent value
86 gistfindgroup(Relation r, GISTSTATE *giststate, GISTENTRY *valvec, GistSplitVector *spl, int attno)
93 * attno key is always not null (see gistSplitByKey), so we may not check
96 gistentryinit(entry, spl->splitVector.spl_rdatum, r, NULL, (OffsetNumber) 0, FALSE);
97 for (i = 0; i < spl->splitVector.spl_nleft; i++)
99 float penalty = gistpenalty(giststate, attno, &entry, false,
100 &valvec[spl->splitVector.spl_left[i]], false);
104 spl->spl_equiv[spl->splitVector.spl_left[i]] = true;
109 gistentryinit(entry, spl->splitVector.spl_ldatum, r, NULL, (OffsetNumber) 0, FALSE);
110 for (i = 0; i < spl->splitVector.spl_nright; i++)
112 float penalty = gistpenalty(giststate, attno, &entry, false,
113 &valvec[spl->splitVector.spl_right[i]], false);
117 spl->spl_equiv[spl->splitVector.spl_right[i]] = true;
126 cleanupOffsets(OffsetNumber *a, int *len, bool *equiv, int *LenEquiv)
130 OffsetNumber *curwpos;
134 for (i = 0; i < *len; i++)
136 if (equiv[a[i]] == FALSE)
143 /* corner case: we shouldn't make void array */
146 equiv[a[i]] = FALSE; /* mark item as non-equivalent */
147 i--; /* redo the same */
160 placeOne(Relation r, GISTSTATE *giststate, GistSplitVector *v, IndexTuple itup, OffsetNumber off, int attno)
162 GISTENTRY identry[INDEX_MAX_KEYS];
163 bool isnull[INDEX_MAX_KEYS];
166 gistDeCompressAtt(giststate, r, itup, NULL, (OffsetNumber) 0, identry, isnull);
168 for (; attno < giststate->tupdesc->natts; attno++)
174 gistentryinit(entry, v->spl_lattr[attno], r, NULL, 0, FALSE);
175 lpenalty = gistpenalty(giststate, attno, &entry, v->spl_lisnull[attno], identry + attno, isnull[attno]);
176 gistentryinit(entry, v->spl_rattr[attno], r, NULL, 0, FALSE);
177 rpenalty = gistpenalty(giststate, attno, &entry, v->spl_risnull[attno], identry + attno, isnull[attno]);
179 if (lpenalty != rpenalty)
181 if (lpenalty > rpenalty)
188 v->splitVector.spl_left[v->splitVector.spl_nleft++] = off;
190 v->splitVector.spl_right[v->splitVector.spl_nright++] = off;
193 #define SWAPVAR( s, d, t ) \
201 * adjust left and right unions according to splits by previous
202 * split by firsts columns. This function is called only in case
203 * when pickSplit doesn't support subspplit.
207 supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno, GIST_SPLITVEC *sv, Datum oldL, Datum oldR)
209 bool leaveOnLeft = true,
216 gistentryinit(entryL, oldL, r, NULL, 0, FALSE);
217 gistentryinit(entryR, oldR, r, NULL, 0, FALSE);
218 gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, FALSE);
219 gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, FALSE);
221 if (sv->spl_ldatum_exists && sv->spl_rdatum_exists)
226 penalty1 = gistpenalty(giststate, attno, &entryL, false, &entrySL, false) +
227 gistpenalty(giststate, attno, &entryR, false, &entrySR, false);
228 penalty2 = gistpenalty(giststate, attno, &entryL, false, &entrySR, false) +
229 gistpenalty(giststate, attno, &entryR, false, &entrySL, false);
231 if (penalty1 > penalty2)
237 GISTENTRY *entry1 = (sv->spl_ldatum_exists) ? &entryL : &entryR;
242 * there is only one previously defined union, so we just choose swap
243 * or not by lowest penalty
246 penalty1 = gistpenalty(giststate, attno, entry1, false, &entrySL, false);
247 penalty2 = gistpenalty(giststate, attno, entry1, false, &entrySR, false);
249 if (penalty1 < penalty2)
250 leaveOnLeft = (sv->spl_ldatum_exists) ? true : false;
252 leaveOnLeft = (sv->spl_rdatum_exists) ? true : false;
255 if (leaveOnLeft == false)
258 * swap left and right
264 SWAPVAR(sv->spl_left, sv->spl_right, off);
265 SWAPVAR(sv->spl_nleft, sv->spl_nright, noff);
266 SWAPVAR(sv->spl_ldatum, sv->spl_rdatum, datum);
267 gistentryinit(entrySL, sv->spl_ldatum, r, NULL, 0, FALSE);
268 gistentryinit(entrySR, sv->spl_rdatum, r, NULL, 0, FALSE);
271 if (sv->spl_ldatum_exists)
272 gistMakeUnionKey(giststate, attno, &entryL, false, &entrySL, false,
273 &sv->spl_ldatum, &tmpBool);
275 if (sv->spl_rdatum_exists)
276 gistMakeUnionKey(giststate, attno, &entryR, false, &entrySR, false,
277 &sv->spl_rdatum, &tmpBool);
279 sv->spl_ldatum_exists = sv->spl_rdatum_exists = false;
283 * Calls user picksplit method for attno columns to split vector to
284 * two vectors. May use attno+n columns data to
286 * Returns TRUE and v->spl_equiv = NULL if left and right unions of attno columns are the same,
287 * so caller may find better split
288 * Returns TRUE and v->spl_equiv != NULL if there is tuples which may be freely moved
291 gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVector *v,
292 IndexTuple *itup, int len, GISTSTATE *giststate)
294 GIST_SPLITVEC *sv = &v->splitVector;
297 * now let the user-defined picksplit function set up the split vector; in
298 * entryvec have no null value!!
301 sv->spl_ldatum_exists = (v->spl_lisnull[attno]) ? false : true;
302 sv->spl_rdatum_exists = (v->spl_risnull[attno]) ? false : true;
303 sv->spl_ldatum = v->spl_lattr[attno];
304 sv->spl_rdatum = v->spl_rattr[attno];
306 FunctionCall2(&giststate->picksplitFn[attno],
307 PointerGetDatum(entryvec),
308 PointerGetDatum(sv));
310 /* compatibility with old code */
311 if (sv->spl_left[sv->spl_nleft - 1] == InvalidOffsetNumber)
312 sv->spl_left[sv->spl_nleft - 1] = (OffsetNumber) (entryvec->n - 1);
313 if (sv->spl_right[sv->spl_nright - 1] == InvalidOffsetNumber)
314 sv->spl_right[sv->spl_nright - 1] = (OffsetNumber) (entryvec->n - 1);
316 if (sv->spl_ldatum_exists || sv->spl_rdatum_exists)
318 elog(LOG, "PickSplit method of %d columns of index '%s' doesn't support secondary split",
319 attno + 1, RelationGetRelationName(r));
321 supportSecondarySplit(r, giststate, attno, sv, v->spl_lattr[attno], v->spl_rattr[attno]);
324 v->spl_lattr[attno] = sv->spl_ldatum;
325 v->spl_rattr[attno] = sv->spl_rdatum;
326 v->spl_lisnull[attno] = false;
327 v->spl_risnull[attno] = false;
330 * if index is multikey, then we must to try get smaller bounding box for
335 if (giststate->tupdesc->natts > 1 && attno + 1 != giststate->tupdesc->natts)
337 if (gistKeyIsEQ(giststate, attno, sv->spl_ldatum, sv->spl_rdatum))
340 * Left and right key's unions are equial, so we can get better
341 * split by following columns. Note, unions for attno columns are
351 v->spl_equiv = (bool *) palloc0(sizeof(bool) * (entryvec->n + 1));
353 LenEquiv = gistfindgroup(r, giststate, entryvec->vector, v, attno);
356 * if possible, we should distribute equivalent tuples
360 gistunionsubkey(giststate, itup, v, attno + 1);
364 cleanupOffsets(sv->spl_left, &sv->spl_nleft, v->spl_equiv, &LenEquiv);
365 cleanupOffsets(sv->spl_right, &sv->spl_nright, v->spl_equiv, &LenEquiv);
367 gistunionsubkey(giststate, itup, v, attno + 1);
371 * In case with one tuple we just choose left-right by
372 * penalty. It's simplify user-defined pickSplit
374 OffsetNumber toMove = InvalidOffsetNumber;
376 for (toMove = FirstOffsetNumber; toMove < entryvec->n; toMove++)
377 if (v->spl_equiv[toMove])
379 Assert(toMove < entryvec->n);
381 placeOne(r, giststate, v, itup[toMove - 1], toMove, attno + 1);
384 * redo gistunionsubkey(): it will not degradate
385 * performance, because it's very rarely
388 gistunionsubkey(giststate, itup, v, attno + 1);
392 else if (LenEquiv > 1)
405 gistSplitHalf(GIST_SPLITVEC *v, int len)
409 v->spl_nright = v->spl_nleft = 0;
410 v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
411 v->spl_right = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
412 for (i = 1; i <= len; i++)
414 v->spl_right[v->spl_nright++] = i;
416 v->spl_left[v->spl_nleft++] = i;
420 * if it was invalid tuple then we need special processing.
421 * We move all invalid tuples on right page.
423 * if there is no place on left page, gistSplit will be called one more
424 * time for left page.
426 * Normally, we never exec this code, but after crash replay it's possible
427 * to get 'invalid' tuples (probability is low enough)
430 gistSplitByInvalid(GISTSTATE *giststate, GistSplitVector *v, IndexTuple *itup, int len)
433 static OffsetNumber offInvTuples[MaxOffsetNumber];
434 int nOffInvTuples = 0;
436 for (i = 1; i <= len; i++)
437 if (GistTupleIsInvalid(itup[i - 1]))
438 offInvTuples[nOffInvTuples++] = i;
440 if (nOffInvTuples == len)
442 /* corner case, all tuples are invalid */
443 v->spl_rightvalid = v->spl_leftvalid = false;
444 gistSplitHalf(&v->splitVector, len);
450 v->splitVector.spl_right = offInvTuples;
451 v->splitVector.spl_nright = nOffInvTuples;
452 v->spl_rightvalid = false;
454 v->splitVector.spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
455 v->splitVector.spl_nleft = 0;
456 for (i = 1; i <= len; i++)
457 if (!GistTupleIsInvalid(itup[i - 1]))
458 v->splitVector.spl_left[v->splitVector.spl_nleft++] = i;
459 v->spl_leftvalid = true;
462 gsvp.attr = v->spl_lattr;
463 gsvp.len = v->splitVector.spl_nleft;
464 gsvp.entries = v->splitVector.spl_left;
465 gsvp.isnull = v->spl_lisnull;
467 gistunionsubkeyvec(giststate, itup, &gsvp, 0);
472 * trys to split page by attno key, in a case of null
473 * values move its to separate page.
476 gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate,
477 GistSplitVector *v, GistEntryVector *entryvec, int attno)
480 static OffsetNumber offNullTuples[MaxOffsetNumber];
481 int nOffNullTuples = 0;
483 for (i = 1; i <= len; i++)
488 if (!GistPageIsLeaf(page) && GistTupleIsInvalid(itup[i - 1]))
490 gistSplitByInvalid(giststate, v, itup, len);
494 datum = index_getattr(itup[i - 1], attno + 1, giststate->tupdesc, &IsNull);
495 gistdentryinit(giststate, attno, &(entryvec->vector[i]),
499 offNullTuples[nOffNullTuples++] = i;
502 v->spl_leftvalid = v->spl_rightvalid = true;
504 if (nOffNullTuples == len)
507 * Corner case: All keys in attno column are null, we should try to
508 * split by keys in next column. It all keys in all columns are NULL
509 * just split page half by half
511 v->spl_risnull[attno] = v->spl_lisnull[attno] = TRUE;
513 if (attno + 1 == r->rd_att->natts)
514 gistSplitHalf(&v->splitVector, len);
516 gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno + 1);
518 else if (nOffNullTuples > 0)
523 * We don't want to mix NULLs and not-NULLs keys on one page, so move
524 * nulls to right page
526 v->splitVector.spl_right = offNullTuples;
527 v->splitVector.spl_nright = nOffNullTuples;
528 v->spl_risnull[attno] = TRUE;
530 v->splitVector.spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
531 v->splitVector.spl_nleft = 0;
532 for (i = 1; i <= len; i++)
533 if (j < v->splitVector.spl_nright && offNullTuples[j] == i)
536 v->splitVector.spl_left[v->splitVector.spl_nleft++] = i;
539 gistunionsubkey(giststate, itup, v, attno);
544 * all keys are not-null
546 entryvec->n = len + 1;
548 if (gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate) && attno + 1 != r->rd_att->natts)
551 * Splitting on attno column is not optimized: there is a tuples
552 * which can be freely left or right page, we will try to split
553 * page by following columns
555 if (v->spl_equiv == NULL)
558 * simple case: left and right keys for attno column are
561 gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno + 1);
565 /* we should clean up vector from already distributed tuples */
566 IndexTuple *newitup = (IndexTuple *) palloc((len + 1) * sizeof(IndexTuple));
567 OffsetNumber *map = (OffsetNumber *) palloc((len + 1) * sizeof(IndexTuple));
569 GIST_SPLITVEC backupSplit = v->splitVector;
571 for (i = 0; i < len; i++)
572 if (v->spl_equiv[i + 1])
575 newitup[newlen++] = itup[i];
580 backupSplit.spl_left = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
581 memcpy(backupSplit.spl_left, v->splitVector.spl_left, sizeof(OffsetNumber) * v->splitVector.spl_nleft);
582 backupSplit.spl_right = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
583 memcpy(backupSplit.spl_right, v->splitVector.spl_right, sizeof(OffsetNumber) * v->splitVector.spl_nright);
585 gistSplitByKey(r, page, newitup, newlen, giststate, v, entryvec, attno + 1);
587 /* merge result of subsplit */
588 for (i = 0; i < v->splitVector.spl_nleft; i++)
589 backupSplit.spl_left[backupSplit.spl_nleft++] = map[v->splitVector.spl_left[i] - 1];
590 for (i = 0; i < v->splitVector.spl_nright; i++)
591 backupSplit.spl_right[backupSplit.spl_nright++] = map[v->splitVector.spl_right[i] - 1];
593 v->splitVector = backupSplit;
594 /* reunion left and right datums */
595 gistunionsubkey(giststate, itup, v, attno);