1 // Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
2 // All rights reserved.
4 // Use of this source code is governed by a BSD-style license that can be
5 // found in the LICENSE file.
14 "github.com/syndtr/goleveldb/leveldb/iterator"
15 "github.com/syndtr/goleveldb/leveldb/opt"
16 "github.com/syndtr/goleveldb/leveldb/util"
29 // Level that should be compacted next and its compaction score.
30 // Score < 1 means compaction is not strictly needed. These fields
31 // are initialized by computeCompaction()
42 func newVersion(s *session) *version {
46 func (v *version) incref() {
48 panic("already released")
54 for _, tt := range v.levels {
55 for _, t := range tt {
56 v.s.addFileRef(t.fd, 1)
62 func (v *version) releaseNB() {
67 panic("negative version ref")
70 for _, tt := range v.levels {
71 for _, t := range tt {
72 if v.s.addFileRef(t.fd, -1) == 0 {
81 func (v *version) release() {
87 func (v *version) walkOverlapping(aux tFiles, ikey internalKey, f func(level int, t *tFile) bool, lf func(level int) bool) {
92 for _, t := range aux {
93 if t.overlaps(v.s.icmp, ukey, ukey) {
100 if lf != nil && !lf(-1) {
105 // Walk tables level-by-level.
106 for level, tables := range v.levels {
107 if len(tables) == 0 {
112 // Level-0 files may overlap each other. Find all files that
114 for _, t := range tables {
115 if t.overlaps(v.s.icmp, ukey, ukey) {
122 if i := tables.searchMax(v.s.icmp, ikey); i < len(tables) {
124 if v.s.icmp.uCompare(ukey, t.imin.ukey()) >= 0 {
132 if lf != nil && !lf(level) {
138 func (v *version) get(aux tFiles, ikey internalKey, ro *opt.ReadOptions, noValue bool) (value []byte, tcomp bool, err error) {
140 return nil, false, ErrClosed
158 // Since entries never hop across level, finding key/value
159 // in smaller level make later levels irrelevant.
160 v.walkOverlapping(aux, ikey, func(level int, t *tFile) bool {
161 if level >= 0 && !tseek {
163 tset = &tSet{level, t}
174 fikey, ferr = v.s.tops.findKey(t, ikey, ro)
176 fikey, fval, ferr = v.s.tops.find(t, ikey, ro)
188 if fukey, fseq, fkt, fkerr := parseInternalKey(fikey); fkerr == nil {
189 if v.s.icmp.uCompare(ukey, fukey) == 0 {
190 // Level <= 0 may overlaps each-other.
205 panic("leveldb: invalid internalKey type")
216 }, func(level int) bool {
224 panic("leveldb: invalid internalKey type")
232 if tseek && tset.table.consumeSeek() <= 0 {
233 tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset))
239 func (v *version) sampleSeek(ikey internalKey) (tcomp bool) {
242 v.walkOverlapping(nil, ikey, func(level int, t *tFile) bool {
244 tset = &tSet{level, t}
247 if tset.table.consumeSeek() <= 0 {
248 tcomp = atomic.CompareAndSwapPointer(&v.cSeek, nil, unsafe.Pointer(tset))
256 func (v *version) getIterators(slice *util.Range, ro *opt.ReadOptions) (its []iterator.Iterator) {
257 strict := opt.GetStrict(v.s.o.Options, ro, opt.StrictReader)
258 for level, tables := range v.levels {
260 // Merge all level zero files together since they may overlap.
261 for _, t := range tables {
262 its = append(its, v.s.tops.newIterator(t, slice, ro))
264 } else if len(tables) != 0 {
265 its = append(its, iterator.NewIndexedIterator(tables.newIndexIterator(v.s.tops, v.s.icmp, slice, ro), strict))
271 func (v *version) newStaging() *versionStaging {
272 return &versionStaging{base: v}
275 // Spawn a new version based on this version.
276 func (v *version) spawn(r *sessionRecord) *version {
277 staging := v.newStaging()
279 return staging.finish()
282 func (v *version) fillRecord(r *sessionRecord) {
283 for level, tables := range v.levels {
284 for _, t := range tables {
285 r.addTableFile(level, t)
290 func (v *version) tLen(level int) int {
291 if level < len(v.levels) {
292 return len(v.levels[level])
297 func (v *version) offsetOf(ikey internalKey) (n int64, err error) {
298 for level, tables := range v.levels {
299 for _, t := range tables {
300 if v.s.icmp.Compare(t.imax, ikey) <= 0 {
301 // Entire file is before "ikey", so just add the file size
303 } else if v.s.icmp.Compare(t.imin, ikey) > 0 {
304 // Entire file is after "ikey", so ignore
306 // Files other than level 0 are sorted by meta->min, so
307 // no further files in this level will contain data for
312 // "ikey" falls in the range for this table. Add the
313 // approximate offset of "ikey" within the table.
314 if m, err := v.s.tops.offsetOf(t, ikey); err == nil {
326 func (v *version) pickMemdbLevel(umin, umax []byte, maxLevel int) (level int) {
328 if len(v.levels) == 0 {
331 if !v.levels[0].overlaps(v.s.icmp, umin, umax, true) {
333 for ; level < maxLevel; level++ {
334 if pLevel := level + 1; pLevel >= len(v.levels) {
336 } else if v.levels[pLevel].overlaps(v.s.icmp, umin, umax, false) {
339 if gpLevel := level + 2; gpLevel < len(v.levels) {
340 overlaps = v.levels[gpLevel].getOverlaps(overlaps, v.s.icmp, umin, umax, false)
341 if overlaps.size() > int64(v.s.o.GetCompactionGPOverlaps(level)) {
351 func (v *version) computeCompaction() {
352 // Precomputed best level for next compaction
354 bestScore := float64(-1)
356 statFiles := make([]int, len(v.levels))
357 statSizes := make([]string, len(v.levels))
358 statScore := make([]string, len(v.levels))
359 statTotSize := int64(0)
361 for level, tables := range v.levels {
363 size := tables.size()
365 // We treat level-0 specially by bounding the number of files
366 // instead of number of bytes for two reasons:
368 // (1) With larger write-buffer sizes, it is nice not to do too
369 // many level-0 compaction.
371 // (2) The files in level-0 are merged on every read and
372 // therefore we wish to avoid too many files when the individual
373 // file size is small (perhaps because of a small write-buffer
374 // setting, or very high compression ratios, or lots of
375 // overwrites/deletions).
376 score = float64(len(tables)) / float64(v.s.o.GetCompactionL0Trigger())
378 score = float64(size) / float64(v.s.o.GetCompactionTotalSize(level))
381 if score > bestScore {
386 statFiles[level] = len(tables)
387 statSizes[level] = shortenb(int(size))
388 statScore[level] = fmt.Sprintf("%.2f", score)
395 v.s.logf("version@stat F·%v S·%s%v Sc·%v", statFiles, shortenb(int(statTotSize)), statSizes, statScore)
398 func (v *version) needCompaction() bool {
399 return v.cScore >= 1 || atomic.LoadPointer(&v.cSeek) != nil
402 type tablesScratch struct {
403 added map[int64]atRecord
404 deleted map[int64]struct{}
407 type versionStaging struct {
409 levels []tablesScratch
412 func (p *versionStaging) getScratch(level int) *tablesScratch {
413 if level >= len(p.levels) {
414 newLevels := make([]tablesScratch, level+1)
415 copy(newLevels, p.levels)
418 return &(p.levels[level])
421 func (p *versionStaging) commit(r *sessionRecord) {
423 for _, r := range r.deletedTables {
424 scratch := p.getScratch(r.level)
425 if r.level < len(p.base.levels) && len(p.base.levels[r.level]) > 0 {
426 if scratch.deleted == nil {
427 scratch.deleted = make(map[int64]struct{})
429 scratch.deleted[r.num] = struct{}{}
431 if scratch.added != nil {
432 delete(scratch.added, r.num)
437 for _, r := range r.addedTables {
438 scratch := p.getScratch(r.level)
439 if scratch.added == nil {
440 scratch.added = make(map[int64]atRecord)
442 scratch.added[r.num] = r
443 if scratch.deleted != nil {
444 delete(scratch.deleted, r.num)
449 func (p *versionStaging) finish() *version {
450 // Build new version.
451 nv := newVersion(p.base.s)
452 numLevel := len(p.levels)
453 if len(p.base.levels) > numLevel {
454 numLevel = len(p.base.levels)
456 nv.levels = make([]tFiles, numLevel)
457 for level := 0; level < numLevel; level++ {
458 var baseTabels tFiles
459 if level < len(p.base.levels) {
460 baseTabels = p.base.levels[level]
463 if level < len(p.levels) {
464 scratch := p.levels[level]
467 // Prealloc list if possible.
468 if n := len(baseTabels) + len(scratch.added) - len(scratch.deleted); n > 0 {
469 nt = make(tFiles, 0, n)
473 for _, t := range baseTabels {
474 if _, ok := scratch.deleted[t.fd.Num]; ok {
477 if _, ok := scratch.added[t.fd.Num]; ok {
484 for _, r := range scratch.added {
485 nt = append(nt, tableFileFromRecord(r))
493 nt.sortByKey(p.base.s.icmp)
496 nv.levels[level] = nt
499 nv.levels[level] = baseTabels
505 for ; n > 0 && nv.levels[n-1] == nil; n-- {
507 nv.levels = nv.levels[:n]
509 // Compute compaction score for new version.
510 nv.computeCompaction()
515 type versionReleaser struct {
520 func (vr *versionReleaser) Release() {