mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2025-05-04 13:50:39 +00:00
Update to last common bleve (#3986)
This commit is contained in:
parent
1b7cd3d0b0
commit
917b9641ec
184 changed files with 39576 additions and 121 deletions
453
vendor/github.com/couchbase/vellum/builder.go
generated
vendored
Normal file
453
vendor/github.com/couchbase/vellum/builder.go
generated
vendored
Normal file
|
@ -0,0 +1,453 @@
|
|||
// Copyright (c) 2017 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package vellum
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
)
|
||||
|
||||
var defaultBuilderOpts = &BuilderOpts{
|
||||
Encoder: 1,
|
||||
RegistryTableSize: 10000,
|
||||
RegistryMRUSize: 2,
|
||||
}
|
||||
|
||||
// A Builder is used to build a new FST. When possible data is
|
||||
// streamed out to the underlying Writer as soon as possible.
|
||||
type Builder struct {
|
||||
unfinished *unfinishedNodes
|
||||
registry *registry
|
||||
last []byte
|
||||
len int
|
||||
|
||||
lastAddr int
|
||||
|
||||
encoder encoder
|
||||
opts *BuilderOpts
|
||||
|
||||
builderNodePool builderNodePool
|
||||
transitionPool transitionPool
|
||||
}
|
||||
|
||||
const noneAddr = 1
|
||||
const emptyAddr = 0
|
||||
|
||||
// NewBuilder returns a new Builder which will stream out the
|
||||
// underlying representation to the provided Writer as the set is built.
|
||||
func newBuilder(w io.Writer, opts *BuilderOpts) (*Builder, error) {
|
||||
if opts == nil {
|
||||
opts = defaultBuilderOpts
|
||||
}
|
||||
rv := &Builder{
|
||||
registry: newRegistry(opts.RegistryTableSize, opts.RegistryMRUSize),
|
||||
opts: opts,
|
||||
lastAddr: noneAddr,
|
||||
}
|
||||
rv.unfinished = newUnfinishedNodes(&rv.builderNodePool)
|
||||
|
||||
var err error
|
||||
rv.encoder, err = loadEncoder(opts.Encoder, w)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = rv.encoder.start()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (b *Builder) Reset(w io.Writer) error {
|
||||
b.transitionPool.reset()
|
||||
b.builderNodePool.reset()
|
||||
b.unfinished.Reset(&b.builderNodePool)
|
||||
b.registry.Reset()
|
||||
b.lastAddr = noneAddr
|
||||
b.encoder.reset(w)
|
||||
b.last = nil
|
||||
b.len = 0
|
||||
|
||||
err := b.encoder.start()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Insert the provided value to the set being built.
|
||||
// NOTE: values must be inserted in lexicographical order.
|
||||
func (b *Builder) Insert(key []byte, val uint64) error {
|
||||
// ensure items are added in lexicographic order
|
||||
if bytes.Compare(key, b.last) < 0 {
|
||||
return ErrOutOfOrder
|
||||
}
|
||||
if len(key) == 0 {
|
||||
b.len = 1
|
||||
b.unfinished.setRootOutput(val)
|
||||
return nil
|
||||
}
|
||||
|
||||
prefixLen, out := b.unfinished.findCommonPrefixAndSetOutput(key, val)
|
||||
b.len++
|
||||
err := b.compileFrom(prefixLen)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
b.copyLastKey(key)
|
||||
b.unfinished.addSuffix(key[prefixLen:], out, &b.builderNodePool)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *Builder) copyLastKey(key []byte) {
|
||||
if b.last == nil {
|
||||
b.last = make([]byte, 0, 64)
|
||||
} else {
|
||||
b.last = b.last[:0]
|
||||
}
|
||||
b.last = append(b.last, key...)
|
||||
}
|
||||
|
||||
// Close MUST be called after inserting all values.
|
||||
func (b *Builder) Close() error {
|
||||
err := b.compileFrom(0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
root := b.unfinished.popRoot()
|
||||
rootAddr, err := b.compile(root)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return b.encoder.finish(b.len, rootAddr)
|
||||
}
|
||||
|
||||
func (b *Builder) compileFrom(iState int) error {
|
||||
addr := noneAddr
|
||||
for iState+1 < len(b.unfinished.stack) {
|
||||
var node *builderNode
|
||||
if addr == noneAddr {
|
||||
node = b.unfinished.popEmpty()
|
||||
} else {
|
||||
node = b.unfinished.popFreeze(addr, &b.transitionPool)
|
||||
}
|
||||
var err error
|
||||
addr, err = b.compile(node)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
b.unfinished.topLastFreeze(addr, &b.transitionPool)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *Builder) compile(node *builderNode) (int, error) {
|
||||
if node.final && len(node.trans) == 0 &&
|
||||
node.finalOutput == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
found, addr, entry := b.registry.entry(node)
|
||||
if found {
|
||||
return addr, nil
|
||||
}
|
||||
addr, err := b.encoder.encodeState(node, b.lastAddr)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
b.lastAddr = addr
|
||||
entry.addr = addr
|
||||
return addr, nil
|
||||
}
|
||||
|
||||
type unfinishedNodes struct {
|
||||
stack []*builderNodeUnfinished
|
||||
|
||||
// cache allocates a reasonable number of builderNodeUnfinished
|
||||
// objects up front and tries to keep reusing them
|
||||
// because the main data structure is a stack, we assume the
|
||||
// same access pattern, and don't track items separately
|
||||
// this means calls get() and pushXYZ() must be paired,
|
||||
// as well as calls put() and popXYZ()
|
||||
cache []builderNodeUnfinished
|
||||
}
|
||||
|
||||
func (u *unfinishedNodes) Reset(p *builderNodePool) {
|
||||
u.stack = u.stack[:0]
|
||||
for i := 0; i < len(u.cache); i++ {
|
||||
u.cache[i] = builderNodeUnfinished{}
|
||||
}
|
||||
u.pushEmpty(false, p)
|
||||
}
|
||||
|
||||
func newUnfinishedNodes(p *builderNodePool) *unfinishedNodes {
|
||||
rv := &unfinishedNodes{
|
||||
stack: make([]*builderNodeUnfinished, 0, 64),
|
||||
cache: make([]builderNodeUnfinished, 64),
|
||||
}
|
||||
rv.pushEmpty(false, p)
|
||||
return rv
|
||||
}
|
||||
|
||||
// get new builderNodeUnfinished, reusing cache if possible
|
||||
func (u *unfinishedNodes) get() *builderNodeUnfinished {
|
||||
if len(u.stack) < len(u.cache) {
|
||||
return &u.cache[len(u.stack)]
|
||||
}
|
||||
// full now allocate a new one
|
||||
return &builderNodeUnfinished{}
|
||||
}
|
||||
|
||||
// return builderNodeUnfinished, clearing it for reuse
|
||||
func (u *unfinishedNodes) put() {
|
||||
if len(u.stack) >= len(u.cache) {
|
||||
return
|
||||
// do nothing, not part of cache
|
||||
}
|
||||
u.cache[len(u.stack)] = builderNodeUnfinished{}
|
||||
}
|
||||
|
||||
func (u *unfinishedNodes) findCommonPrefixAndSetOutput(key []byte,
|
||||
out uint64) (int, uint64) {
|
||||
var i int
|
||||
for i < len(key) {
|
||||
if i >= len(u.stack) {
|
||||
break
|
||||
}
|
||||
var addPrefix uint64
|
||||
if !u.stack[i].hasLastT {
|
||||
break
|
||||
}
|
||||
if u.stack[i].lastIn == key[i] {
|
||||
commonPre := outputPrefix(u.stack[i].lastOut, out)
|
||||
addPrefix = outputSub(u.stack[i].lastOut, commonPre)
|
||||
out = outputSub(out, commonPre)
|
||||
u.stack[i].lastOut = commonPre
|
||||
i++
|
||||
} else {
|
||||
break
|
||||
}
|
||||
|
||||
if addPrefix != 0 {
|
||||
u.stack[i].addOutputPrefix(addPrefix)
|
||||
}
|
||||
}
|
||||
|
||||
return i, out
|
||||
}
|
||||
|
||||
func (u *unfinishedNodes) pushEmpty(final bool, p *builderNodePool) {
|
||||
next := u.get()
|
||||
next.node = p.alloc()
|
||||
next.node.final = final
|
||||
u.stack = append(u.stack, next)
|
||||
}
|
||||
|
||||
func (u *unfinishedNodes) popRoot() *builderNode {
|
||||
l := len(u.stack)
|
||||
var unfinished *builderNodeUnfinished
|
||||
u.stack, unfinished = u.stack[:l-1], u.stack[l-1]
|
||||
rv := unfinished.node
|
||||
u.put()
|
||||
return rv
|
||||
}
|
||||
|
||||
func (u *unfinishedNodes) popFreeze(addr int, tp *transitionPool) *builderNode {
|
||||
l := len(u.stack)
|
||||
var unfinished *builderNodeUnfinished
|
||||
u.stack, unfinished = u.stack[:l-1], u.stack[l-1]
|
||||
unfinished.lastCompiled(addr, tp)
|
||||
rv := unfinished.node
|
||||
u.put()
|
||||
return rv
|
||||
}
|
||||
|
||||
func (u *unfinishedNodes) popEmpty() *builderNode {
|
||||
l := len(u.stack)
|
||||
var unfinished *builderNodeUnfinished
|
||||
u.stack, unfinished = u.stack[:l-1], u.stack[l-1]
|
||||
rv := unfinished.node
|
||||
u.put()
|
||||
return rv
|
||||
}
|
||||
|
||||
func (u *unfinishedNodes) setRootOutput(out uint64) {
|
||||
u.stack[0].node.final = true
|
||||
u.stack[0].node.finalOutput = out
|
||||
}
|
||||
|
||||
func (u *unfinishedNodes) topLastFreeze(addr int, tp *transitionPool) {
|
||||
last := len(u.stack) - 1
|
||||
u.stack[last].lastCompiled(addr, tp)
|
||||
}
|
||||
|
||||
func (u *unfinishedNodes) addSuffix(bs []byte, out uint64, p *builderNodePool) {
|
||||
if len(bs) == 0 {
|
||||
return
|
||||
}
|
||||
last := len(u.stack) - 1
|
||||
u.stack[last].hasLastT = true
|
||||
u.stack[last].lastIn = bs[0]
|
||||
u.stack[last].lastOut = out
|
||||
for _, b := range bs[1:] {
|
||||
next := u.get()
|
||||
next.node = p.alloc()
|
||||
next.hasLastT = true
|
||||
next.lastIn = b
|
||||
next.lastOut = 0
|
||||
u.stack = append(u.stack, next)
|
||||
}
|
||||
u.pushEmpty(true, p)
|
||||
}
|
||||
|
||||
type builderNodeUnfinished struct {
|
||||
node *builderNode
|
||||
lastOut uint64
|
||||
lastIn byte
|
||||
hasLastT bool
|
||||
}
|
||||
|
||||
func (b *builderNodeUnfinished) lastCompiled(addr int, tp *transitionPool) {
|
||||
if b.hasLastT {
|
||||
transIn := b.lastIn
|
||||
transOut := b.lastOut
|
||||
b.hasLastT = false
|
||||
b.lastOut = 0
|
||||
trans := tp.alloc()
|
||||
trans.in = transIn
|
||||
trans.out = transOut
|
||||
trans.addr = addr
|
||||
b.node.trans = append(b.node.trans, trans)
|
||||
}
|
||||
}
|
||||
|
||||
func (b *builderNodeUnfinished) addOutputPrefix(prefix uint64) {
|
||||
if b.node.final {
|
||||
b.node.finalOutput = outputCat(prefix, b.node.finalOutput)
|
||||
}
|
||||
for _, t := range b.node.trans {
|
||||
t.out = outputCat(prefix, t.out)
|
||||
}
|
||||
if b.hasLastT {
|
||||
b.lastOut = outputCat(prefix, b.lastOut)
|
||||
}
|
||||
}
|
||||
|
||||
type builderNode struct {
|
||||
finalOutput uint64
|
||||
trans []*transition
|
||||
final bool
|
||||
}
|
||||
|
||||
func (n *builderNode) equiv(o *builderNode) bool {
|
||||
if n.final != o.final {
|
||||
return false
|
||||
}
|
||||
if n.finalOutput != o.finalOutput {
|
||||
return false
|
||||
}
|
||||
if len(n.trans) != len(o.trans) {
|
||||
return false
|
||||
}
|
||||
for i, ntrans := range n.trans {
|
||||
otrans := o.trans[i]
|
||||
if ntrans.in != otrans.in {
|
||||
return false
|
||||
}
|
||||
if ntrans.addr != otrans.addr {
|
||||
return false
|
||||
}
|
||||
if ntrans.out != otrans.out {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
type transition struct {
|
||||
out uint64
|
||||
addr int
|
||||
in byte
|
||||
}
|
||||
|
||||
func outputPrefix(l, r uint64) uint64 {
|
||||
if l < r {
|
||||
return l
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
func outputSub(l, r uint64) uint64 {
|
||||
return l - r
|
||||
}
|
||||
|
||||
func outputCat(l, r uint64) uint64 {
|
||||
return l + r
|
||||
}
|
||||
|
||||
// the next builderNode to alloc() will be all[nextOuter][nextInner]
|
||||
type builderNodePool struct {
|
||||
all [][]builderNode
|
||||
nextOuter int
|
||||
nextInner int
|
||||
}
|
||||
|
||||
func (p *builderNodePool) reset() {
|
||||
p.nextOuter = 0
|
||||
p.nextInner = 0
|
||||
}
|
||||
|
||||
func (p *builderNodePool) alloc() *builderNode {
|
||||
if p.nextOuter >= len(p.all) {
|
||||
p.all = append(p.all, make([]builderNode, 256))
|
||||
}
|
||||
rv := &p.all[p.nextOuter][p.nextInner]
|
||||
p.nextInner += 1
|
||||
if p.nextInner >= len(p.all[p.nextOuter]) {
|
||||
p.nextOuter += 1
|
||||
p.nextInner = 0
|
||||
}
|
||||
rv.finalOutput = 0
|
||||
rv.trans = rv.trans[:0]
|
||||
rv.final = false
|
||||
return rv
|
||||
}
|
||||
|
||||
// the next transition to alloc() will be all[nextOuter][nextInner]
|
||||
type transitionPool struct {
|
||||
all [][]transition
|
||||
nextOuter int
|
||||
nextInner int
|
||||
}
|
||||
|
||||
func (p *transitionPool) reset() {
|
||||
p.nextOuter = 0
|
||||
p.nextInner = 0
|
||||
}
|
||||
|
||||
func (p *transitionPool) alloc() *transition {
|
||||
if p.nextOuter >= len(p.all) {
|
||||
p.all = append(p.all, make([]transition, 256))
|
||||
}
|
||||
rv := &p.all[p.nextOuter][p.nextInner]
|
||||
p.nextInner += 1
|
||||
if p.nextInner >= len(p.all[p.nextOuter]) {
|
||||
p.nextOuter += 1
|
||||
p.nextInner = 0
|
||||
}
|
||||
*rv = transition{}
|
||||
return rv
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue