1 // Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
2 // All rights reserved.
4 // Use of this source code is governed by a BSD-style license that can be
5 // found in the LICENSE file.
14 "github.com/syndtr/goleveldb/leveldb/cache"
15 "github.com/syndtr/goleveldb/leveldb/iterator"
16 "github.com/syndtr/goleveldb/leveldb/opt"
17 "github.com/syndtr/goleveldb/leveldb/storage"
18 "github.com/syndtr/goleveldb/leveldb/table"
19 "github.com/syndtr/goleveldb/leveldb/util"
22 // tFile holds basic information about a table.
27 imin, imax internalKey
30 // Returns true if given key is after largest key of this table.
31 func (t *tFile) after(icmp *iComparer, ukey []byte) bool {
32 return ukey != nil && icmp.uCompare(ukey, t.imax.ukey()) > 0
35 // Returns true if given key is before smallest key of this table.
36 func (t *tFile) before(icmp *iComparer, ukey []byte) bool {
37 return ukey != nil && icmp.uCompare(ukey, t.imin.ukey()) < 0
40 // Returns true if given key range overlaps with this table key range.
41 func (t *tFile) overlaps(icmp *iComparer, umin, umax []byte) bool {
42 return !t.after(icmp, umin) && !t.before(icmp, umax)
45 // Cosumes one seek and return current seeks left.
46 func (t *tFile) consumeSeek() int32 {
47 return atomic.AddInt32(&t.seekLeft, -1)
51 func newTableFile(fd storage.FileDesc, size int64, imin, imax internalKey) *tFile {
59 // We arrange to automatically compact this file after
60 // a certain number of seeks. Let's assume:
61 // (1) One seek costs 10ms
62 // (2) Writing or reading 1MB costs 10ms (100MB/s)
63 // (3) A compaction of 1MB does 25MB of IO:
64 // 1MB read from this level
65 // 10-12MB read from next level (boundaries may be misaligned)
66 // 10-12MB written to next level
67 // This implies that 25 seeks cost the same as the compaction
68 // of 1MB of data. I.e., one seek costs approximately the
69 // same as the compaction of 40KB of data. We are a little
70 // conservative and allow approximately one seek for every 16KB
71 // of data before triggering a compaction.
72 f.seekLeft = int32(size / 16384)
80 func tableFileFromRecord(r atRecord) *tFile {
81 return newTableFile(storage.FileDesc{storage.TypeTable, r.num}, r.size, r.imin, r.imax)
84 // tFiles hold multiple tFile.
87 func (tf tFiles) Len() int { return len(tf) }
88 func (tf tFiles) Swap(i, j int) { tf[i], tf[j] = tf[j], tf[i] }
90 func (tf tFiles) nums() string {
92 for i, f := range tf {
96 x += fmt.Sprint(f.fd.Num)
102 // Returns true if i smallest key is less than j.
103 // This used for sort by key in ascending order.
104 func (tf tFiles) lessByKey(icmp *iComparer, i, j int) bool {
106 n := icmp.Compare(a.imin, b.imin)
108 return a.fd.Num < b.fd.Num
113 // Returns true if i file number is greater than j.
114 // This used for sort by file number in descending order.
115 func (tf tFiles) lessByNum(i, j int) bool {
116 return tf[i].fd.Num > tf[j].fd.Num
119 // Sorts tables by key in ascending order.
120 func (tf tFiles) sortByKey(icmp *iComparer) {
121 sort.Sort(&tFilesSortByKey{tFiles: tf, icmp: icmp})
124 // Sorts tables by file number in descending order.
125 func (tf tFiles) sortByNum() {
126 sort.Sort(&tFilesSortByNum{tFiles: tf})
129 // Returns sum of all tables size.
130 func (tf tFiles) size() (sum int64) {
131 for _, t := range tf {
137 // Searches smallest index of tables whose its smallest
138 // key is after or equal with given key.
139 func (tf tFiles) searchMin(icmp *iComparer, ikey internalKey) int {
140 return sort.Search(len(tf), func(i int) bool {
141 return icmp.Compare(tf[i].imin, ikey) >= 0
145 // Searches smallest index of tables whose its largest
146 // key is after or equal with given key.
147 func (tf tFiles) searchMax(icmp *iComparer, ikey internalKey) int {
148 return sort.Search(len(tf), func(i int) bool {
149 return icmp.Compare(tf[i].imax, ikey) >= 0
153 // Returns true if given key range overlaps with one or more
154 // tables key range. If unsorted is true then binary search will not be used.
155 func (tf tFiles) overlaps(icmp *iComparer, umin, umax []byte, unsorted bool) bool {
157 // Check against all files.
158 for _, t := range tf {
159 if t.overlaps(icmp, umin, umax) {
168 // Find the earliest possible internal key for min.
169 i = tf.searchMax(icmp, makeInternalKey(nil, umin, keyMaxSeq, keyTypeSeek))
172 // Beginning of range is after all files, so no overlap.
175 return !tf[i].before(icmp, umax)
178 // Returns tables whose its key range overlaps with given key range.
179 // Range will be expanded if ukey found hop across tables.
180 // If overlapped is true then the search will be restarted if umax
182 // The dst content will be overwritten.
183 func (tf tFiles) getOverlaps(dst tFiles, icmp *iComparer, umin, umax []byte, overlapped bool) tFiles {
185 for i := 0; i < len(tf); {
187 if t.overlaps(icmp, umin, umax) {
188 if umin != nil && icmp.uCompare(t.imin.ukey(), umin) < 0 {
193 } else if umax != nil && icmp.uCompare(t.imax.ukey(), umax) > 0 {
195 // Restart search if it is overlapped.
211 // Returns tables key range.
212 func (tf tFiles) getRange(icmp *iComparer) (imin, imax internalKey) {
213 for i, t := range tf {
215 imin, imax = t.imin, t.imax
218 if icmp.Compare(t.imin, imin) < 0 {
221 if icmp.Compare(t.imax, imax) > 0 {
229 // Creates iterator index from tables.
230 func (tf tFiles) newIndexIterator(tops *tOps, icmp *iComparer, slice *util.Range, ro *opt.ReadOptions) iterator.IteratorIndexer {
233 if slice.Start != nil {
234 start = tf.searchMax(icmp, internalKey(slice.Start))
236 if slice.Limit != nil {
237 limit = tf.searchMin(icmp, internalKey(slice.Limit))
243 return iterator.NewArrayIndexer(&tFilesArrayIndexer{
252 // Tables iterator index.
253 type tFilesArrayIndexer struct {
261 func (a *tFilesArrayIndexer) Search(key []byte) int {
262 return a.searchMax(a.icmp, internalKey(key))
265 func (a *tFilesArrayIndexer) Get(i int) iterator.Iterator {
266 if i == 0 || i == a.Len()-1 {
267 return a.tops.newIterator(a.tFiles[i], a.slice, a.ro)
269 return a.tops.newIterator(a.tFiles[i], nil, a.ro)
272 // Helper type for sortByKey.
273 type tFilesSortByKey struct {
278 func (x *tFilesSortByKey) Less(i, j int) bool {
279 return x.lessByKey(x.icmp, i, j)
282 // Helper type for sortByNum.
283 type tFilesSortByNum struct {
287 func (x *tFilesSortByNum) Less(i, j int) bool {
288 return x.lessByNum(i, j)
297 bpool *util.BufferPool
300 // Creates an empty table and returns table writer.
301 func (t *tOps) create() (*tWriter, error) {
302 fd := storage.FileDesc{storage.TypeTable, t.s.allocFileNum()}
303 fw, err := t.s.stor.Create(fd)
311 tw: table.NewWriter(fw, t.s.o.Options),
315 // Builds table from src iterator.
316 func (t *tOps) createFrom(src iterator.Iterator) (f *tFile, n int, err error) {
329 err = w.append(src.Key(), src.Value())
339 n = w.tw.EntriesLen()
344 // Opens table. It returns a cache handle, which should
345 // be released after use.
346 func (t *tOps) open(f *tFile) (ch *cache.Handle, err error) {
347 ch = t.cache.Get(0, uint64(f.fd.Num), func() (size int, value cache.Value) {
349 r, err = t.s.stor.Open(f.fd)
354 var bcache *cache.NamespaceGetter
356 bcache = &cache.NamespaceGetter{Cache: t.bcache, NS: uint64(f.fd.Num)}
360 tr, err = table.NewReader(r, f.size, f.fd, bcache, t.bpool, t.s.o.Options)
368 if ch == nil && err == nil {
374 // Finds key/value pair whose key is greater than or equal to the
376 func (t *tOps) find(f *tFile, key []byte, ro *opt.ReadOptions) (rkey, rvalue []byte, err error) {
382 return ch.Value().(*table.Reader).Find(key, true, ro)
385 // Finds key that is greater than or equal to the given key.
386 func (t *tOps) findKey(f *tFile, key []byte, ro *opt.ReadOptions) (rkey []byte, err error) {
392 return ch.Value().(*table.Reader).FindKey(key, true, ro)
395 // Returns approximate offset of the given key.
396 func (t *tOps) offsetOf(f *tFile, key []byte) (offset int64, err error) {
402 return ch.Value().(*table.Reader).OffsetOf(key)
405 // Creates an iterator from the given table.
406 func (t *tOps) newIterator(f *tFile, slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
409 return iterator.NewEmptyIterator(err)
411 iter := ch.Value().(*table.Reader).NewIterator(slice, ro)
416 // Removes table from persistent storage. It waits until
417 // no one use the the table.
418 func (t *tOps) remove(f *tFile) {
419 t.cache.Delete(0, uint64(f.fd.Num), func() {
420 if err := t.s.stor.Remove(f.fd); err != nil {
421 t.s.logf("table@remove removing @%d %q", f.fd.Num, err)
423 t.s.logf("table@remove removed @%d", f.fd.Num)
426 t.bcache.EvictNS(uint64(f.fd.Num))
431 // Closes the table ops instance. It will close all tables,
432 // regadless still used or not.
433 func (t *tOps) close() {
441 // Creates new initialized table ops instance.
442 func newTableOps(s *session) *tOps {
446 bpool *util.BufferPool
448 if s.o.GetOpenFilesCacheCapacity() > 0 {
449 cacher = cache.NewLRU(s.o.GetOpenFilesCacheCapacity())
451 if !s.o.GetDisableBlockCache() {
452 var bcacher cache.Cacher
453 if s.o.GetBlockCacheCapacity() > 0 {
454 bcacher = cache.NewLRU(s.o.GetBlockCacheCapacity())
456 bcache = cache.NewCache(bcacher)
458 if !s.o.GetDisableBufferPool() {
459 bpool = util.NewBufferPool(s.o.GetBlockSize() + 5)
463 noSync: s.o.GetNoSync(),
464 cache: cache.NewCache(cacher),
470 // tWriter wraps the table writer. It keep track of file descriptor
471 // and added key range.
472 type tWriter struct {
482 // Append key/value pair to the table.
483 func (w *tWriter) append(key, value []byte) error {
485 w.first = append([]byte{}, key...)
487 w.last = append(w.last[:0], key...)
488 return w.tw.Append(key, value)
491 // Returns true if the table is empty.
492 func (w *tWriter) empty() bool {
493 return w.first == nil
496 // Closes the storage.Writer.
497 func (w *tWriter) close() {
504 // Finalizes the table and returns table file.
505 func (w *tWriter) finish() (f *tFile, err error) {
517 f = newTableFile(w.fd, int64(w.tw.BytesLen()), internalKey(w.first), internalKey(w.last))
522 func (w *tWriter) drop() {
524 w.t.s.stor.Remove(w.fd)
525 w.t.s.reuseFileNum(w.fd.Num)