2020-10-16 13:06:27 +08:00
|
|
|
// Copyright 2014-2019 Ulrich Kunitz. All rights reserved.
|
2020-06-06 04:47:39 +08:00
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package lzma
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"unicode"
|
|
|
|
)
|
|
|
|
|
|
|
|
// node represents a node in the binary tree.
|
|
|
|
type node struct {
|
|
|
|
// x is the search value
|
|
|
|
x uint32
|
|
|
|
// p parent node
|
|
|
|
p uint32
|
|
|
|
// l left child
|
|
|
|
l uint32
|
|
|
|
// r right child
|
|
|
|
r uint32
|
|
|
|
}
|
|
|
|
|
|
|
|
// wordLen is the number of bytes represented by the v field of a node.
|
|
|
|
const wordLen = 4
|
|
|
|
|
|
|
|
// binTree supports the identification of the next operation based on a
|
|
|
|
// binary tree.
|
|
|
|
//
|
|
|
|
// Nodes will be identified by their index into the ring buffer.
|
|
|
|
type binTree struct {
|
|
|
|
dict *encoderDict
|
|
|
|
// ring buffer of nodes
|
|
|
|
node []node
|
|
|
|
// absolute offset of the entry for the next node. Position 4
|
|
|
|
// byte larger.
|
|
|
|
hoff int64
|
|
|
|
// front position in the node ring buffer
|
|
|
|
front uint32
|
|
|
|
// index of the root node
|
|
|
|
root uint32
|
|
|
|
// current x value
|
|
|
|
x uint32
|
|
|
|
// preallocated array
|
|
|
|
data []byte
|
|
|
|
}
|
|
|
|
|
|
|
|
// null represents the nonexistent index. We can't use zero because it
|
|
|
|
// would always exist or we would need to decrease the index for each
|
|
|
|
// reference.
|
|
|
|
const null uint32 = 1<<32 - 1
|
|
|
|
|
|
|
|
// newBinTree initializes the binTree structure. The capacity defines
|
|
|
|
// the size of the buffer and defines the maximum distance for which
|
|
|
|
// matches will be found.
|
|
|
|
func newBinTree(capacity int) (t *binTree, err error) {
|
|
|
|
if capacity < 1 {
|
|
|
|
return nil, errors.New(
|
|
|
|
"newBinTree: capacity must be larger than zero")
|
|
|
|
}
|
|
|
|
if int64(capacity) >= int64(null) {
|
|
|
|
return nil, errors.New(
|
|
|
|
"newBinTree: capacity must less 2^{32}-1")
|
|
|
|
}
|
|
|
|
t = &binTree{
|
|
|
|
node: make([]node, capacity),
|
|
|
|
hoff: -int64(wordLen),
|
|
|
|
root: null,
|
|
|
|
data: make([]byte, maxMatchLen),
|
|
|
|
}
|
|
|
|
return t, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *binTree) SetDict(d *encoderDict) { t.dict = d }
|
|
|
|
|
|
|
|
// WriteByte writes a single byte into the binary tree.
|
|
|
|
func (t *binTree) WriteByte(c byte) error {
|
|
|
|
t.x = (t.x << 8) | uint32(c)
|
|
|
|
t.hoff++
|
|
|
|
if t.hoff < 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
v := t.front
|
|
|
|
if int64(v) < t.hoff {
|
|
|
|
// We are overwriting old nodes stored in the tree.
|
|
|
|
t.remove(v)
|
|
|
|
}
|
|
|
|
t.node[v].x = t.x
|
|
|
|
t.add(v)
|
|
|
|
t.front++
|
|
|
|
if int64(t.front) >= int64(len(t.node)) {
|
|
|
|
t.front = 0
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Writes writes a sequence of bytes into the binTree structure.
|
|
|
|
func (t *binTree) Write(p []byte) (n int, err error) {
|
|
|
|
for _, c := range p {
|
|
|
|
t.WriteByte(c)
|
|
|
|
}
|
|
|
|
return len(p), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// add puts the node v into the tree. The node must not be part of the
|
|
|
|
// tree before.
|
|
|
|
func (t *binTree) add(v uint32) {
|
|
|
|
vn := &t.node[v]
|
|
|
|
// Set left and right to null indices.
|
|
|
|
vn.l, vn.r = null, null
|
|
|
|
// If the binary tree is empty make v the root.
|
|
|
|
if t.root == null {
|
|
|
|
t.root = v
|
|
|
|
vn.p = null
|
|
|
|
return
|
|
|
|
}
|
|
|
|
x := vn.x
|
|
|
|
p := t.root
|
|
|
|
// Search for the right leave link and add the new node.
|
|
|
|
for {
|
|
|
|
pn := &t.node[p]
|
|
|
|
if x <= pn.x {
|
|
|
|
if pn.l == null {
|
|
|
|
pn.l = v
|
|
|
|
vn.p = p
|
|
|
|
return
|
|
|
|
}
|
|
|
|
p = pn.l
|
|
|
|
} else {
|
|
|
|
if pn.r == null {
|
|
|
|
pn.r = v
|
|
|
|
vn.p = p
|
|
|
|
return
|
|
|
|
}
|
|
|
|
p = pn.r
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// parent returns the parent node index of v and the pointer to v value
|
|
|
|
// in the parent.
|
|
|
|
func (t *binTree) parent(v uint32) (p uint32, ptr *uint32) {
|
|
|
|
if t.root == v {
|
|
|
|
return null, &t.root
|
|
|
|
}
|
|
|
|
p = t.node[v].p
|
|
|
|
if t.node[p].l == v {
|
|
|
|
ptr = &t.node[p].l
|
|
|
|
} else {
|
|
|
|
ptr = &t.node[p].r
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Remove node v.
|
|
|
|
func (t *binTree) remove(v uint32) {
|
|
|
|
vn := &t.node[v]
|
|
|
|
p, ptr := t.parent(v)
|
|
|
|
l, r := vn.l, vn.r
|
|
|
|
if l == null {
|
|
|
|
// Move the right child up.
|
|
|
|
*ptr = r
|
|
|
|
if r != null {
|
|
|
|
t.node[r].p = p
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if r == null {
|
|
|
|
// Move the left child up.
|
|
|
|
*ptr = l
|
|
|
|
t.node[l].p = p
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Search the in-order predecessor u.
|
|
|
|
un := &t.node[l]
|
|
|
|
ur := un.r
|
|
|
|
if ur == null {
|
|
|
|
// In order predecessor is l. Move it up.
|
|
|
|
un.r = r
|
|
|
|
t.node[r].p = l
|
|
|
|
un.p = p
|
|
|
|
*ptr = l
|
|
|
|
return
|
|
|
|
}
|
|
|
|
var u uint32
|
|
|
|
for {
|
|
|
|
// Look for the max value in the tree where l is root.
|
|
|
|
u = ur
|
|
|
|
ur = t.node[u].r
|
|
|
|
if ur == null {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// replace u with ul
|
|
|
|
un = &t.node[u]
|
|
|
|
ul := un.l
|
|
|
|
up := un.p
|
|
|
|
t.node[up].r = ul
|
|
|
|
if ul != null {
|
|
|
|
t.node[ul].p = up
|
|
|
|
}
|
|
|
|
|
|
|
|
// replace v by u
|
|
|
|
un.l, un.r = l, r
|
|
|
|
t.node[l].p = u
|
|
|
|
t.node[r].p = u
|
|
|
|
*ptr = u
|
|
|
|
un.p = p
|
|
|
|
}
|
|
|
|
|
|
|
|
// search looks for the node that have the value x or for the nodes that
|
|
|
|
// brace it. The node highest in the tree with the value x will be
|
|
|
|
// returned. All other nodes with the same value live in left subtree of
|
|
|
|
// the returned node.
|
|
|
|
func (t *binTree) search(v uint32, x uint32) (a, b uint32) {
|
|
|
|
a, b = null, null
|
|
|
|
if v == null {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
for {
|
|
|
|
vn := &t.node[v]
|
|
|
|
if x <= vn.x {
|
|
|
|
if x == vn.x {
|
|
|
|
return v, v
|
|
|
|
}
|
|
|
|
b = v
|
|
|
|
if vn.l == null {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
v = vn.l
|
|
|
|
} else {
|
|
|
|
a = v
|
|
|
|
if vn.r == null {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
v = vn.r
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// max returns the node with maximum value in the subtree with v as
|
|
|
|
// root.
|
|
|
|
func (t *binTree) max(v uint32) uint32 {
|
|
|
|
if v == null {
|
|
|
|
return null
|
|
|
|
}
|
|
|
|
for {
|
|
|
|
r := t.node[v].r
|
|
|
|
if r == null {
|
|
|
|
return v
|
|
|
|
}
|
|
|
|
v = r
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// min returns the node with the minimum value in the subtree with v as
|
|
|
|
// root.
|
|
|
|
func (t *binTree) min(v uint32) uint32 {
|
|
|
|
if v == null {
|
|
|
|
return null
|
|
|
|
}
|
|
|
|
for {
|
|
|
|
l := t.node[v].l
|
|
|
|
if l == null {
|
|
|
|
return v
|
|
|
|
}
|
|
|
|
v = l
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// pred returns the in-order predecessor of node v.
|
|
|
|
func (t *binTree) pred(v uint32) uint32 {
|
|
|
|
if v == null {
|
|
|
|
return null
|
|
|
|
}
|
|
|
|
u := t.max(t.node[v].l)
|
|
|
|
if u != null {
|
|
|
|
return u
|
|
|
|
}
|
|
|
|
for {
|
|
|
|
p := t.node[v].p
|
|
|
|
if p == null {
|
|
|
|
return null
|
|
|
|
}
|
|
|
|
if t.node[p].r == v {
|
|
|
|
return p
|
|
|
|
}
|
|
|
|
v = p
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// succ returns the in-order successor of node v.
|
|
|
|
func (t *binTree) succ(v uint32) uint32 {
|
|
|
|
if v == null {
|
|
|
|
return null
|
|
|
|
}
|
|
|
|
u := t.min(t.node[v].r)
|
|
|
|
if u != null {
|
|
|
|
return u
|
|
|
|
}
|
|
|
|
for {
|
|
|
|
p := t.node[v].p
|
|
|
|
if p == null {
|
|
|
|
return null
|
|
|
|
}
|
|
|
|
if t.node[p].l == v {
|
|
|
|
return p
|
|
|
|
}
|
|
|
|
v = p
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// xval converts the first four bytes of a into an 32-bit unsigned
|
|
|
|
// integer in big-endian order.
|
|
|
|
func xval(a []byte) uint32 {
|
|
|
|
var x uint32
|
|
|
|
switch len(a) {
|
|
|
|
default:
|
|
|
|
x |= uint32(a[3])
|
|
|
|
fallthrough
|
|
|
|
case 3:
|
|
|
|
x |= uint32(a[2]) << 8
|
|
|
|
fallthrough
|
|
|
|
case 2:
|
|
|
|
x |= uint32(a[1]) << 16
|
|
|
|
fallthrough
|
|
|
|
case 1:
|
|
|
|
x |= uint32(a[0]) << 24
|
|
|
|
case 0:
|
|
|
|
}
|
|
|
|
return x
|
|
|
|
}
|
|
|
|
|
|
|
|
// dumpX converts value x into a four-letter string.
|
|
|
|
func dumpX(x uint32) string {
|
|
|
|
a := make([]byte, 4)
|
|
|
|
for i := 0; i < 4; i++ {
|
|
|
|
c := byte(x >> uint((3-i)*8))
|
|
|
|
if unicode.IsGraphic(rune(c)) {
|
|
|
|
a[i] = c
|
|
|
|
} else {
|
|
|
|
a[i] = '.'
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return string(a)
|
|
|
|
}
|
|
|
|
|
|
|
|
// dumpNode writes a representation of the node v into the io.Writer.
|
|
|
|
func (t *binTree) dumpNode(w io.Writer, v uint32, indent int) {
|
|
|
|
if v == null {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
vn := &t.node[v]
|
|
|
|
|
|
|
|
t.dumpNode(w, vn.r, indent+2)
|
|
|
|
|
|
|
|
for i := 0; i < indent; i++ {
|
|
|
|
fmt.Fprint(w, " ")
|
|
|
|
}
|
|
|
|
if vn.p == null {
|
|
|
|
fmt.Fprintf(w, "node %d %q parent null\n", v, dumpX(vn.x))
|
|
|
|
} else {
|
|
|
|
fmt.Fprintf(w, "node %d %q parent %d\n", v, dumpX(vn.x), vn.p)
|
|
|
|
}
|
|
|
|
|
|
|
|
t.dumpNode(w, vn.l, indent+2)
|
|
|
|
}
|
|
|
|
|
|
|
|
// dump prints a representation of the binary tree into the writer.
|
|
|
|
func (t *binTree) dump(w io.Writer) error {
|
|
|
|
bw := bufio.NewWriter(w)
|
|
|
|
t.dumpNode(bw, t.root, 0)
|
|
|
|
return bw.Flush()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *binTree) distance(v uint32) int {
|
|
|
|
dist := int(t.front) - int(v)
|
|
|
|
if dist <= 0 {
|
|
|
|
dist += len(t.node)
|
|
|
|
}
|
|
|
|
return dist
|
|
|
|
}
|
|
|
|
|
|
|
|
type matchParams struct {
|
|
|
|
rep [4]uint32
|
|
|
|
// length when match will be accepted
|
|
|
|
nAccept int
|
|
|
|
// nodes to check
|
|
|
|
check int
|
|
|
|
// finish if length get shorter
|
|
|
|
stopShorter bool
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *binTree) match(m match, distIter func() (int, bool), p matchParams,
|
|
|
|
) (r match, checked int, accepted bool) {
|
|
|
|
buf := &t.dict.buf
|
|
|
|
for {
|
|
|
|
if checked >= p.check {
|
|
|
|
return m, checked, true
|
|
|
|
}
|
|
|
|
dist, ok := distIter()
|
|
|
|
if !ok {
|
|
|
|
return m, checked, false
|
|
|
|
}
|
|
|
|
checked++
|
|
|
|
if m.n > 0 {
|
|
|
|
i := buf.rear - dist + m.n - 1
|
|
|
|
if i < 0 {
|
|
|
|
i += len(buf.data)
|
|
|
|
} else if i >= len(buf.data) {
|
|
|
|
i -= len(buf.data)
|
|
|
|
}
|
|
|
|
if buf.data[i] != t.data[m.n-1] {
|
|
|
|
if p.stopShorter {
|
|
|
|
return m, checked, false
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
n := buf.matchLen(dist, t.data)
|
|
|
|
switch n {
|
|
|
|
case 0:
|
|
|
|
if p.stopShorter {
|
|
|
|
return m, checked, false
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
case 1:
|
|
|
|
if uint32(dist-minDistance) != p.rep[0] {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if n < m.n || (n == m.n && int64(dist) >= m.distance) {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
m = match{int64(dist), n}
|
|
|
|
if n >= p.nAccept {
|
|
|
|
return m, checked, true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *binTree) NextOp(rep [4]uint32) operation {
|
|
|
|
// retrieve maxMatchLen data
|
|
|
|
n, _ := t.dict.buf.Peek(t.data[:maxMatchLen])
|
|
|
|
if n == 0 {
|
|
|
|
panic("no data in buffer")
|
|
|
|
}
|
|
|
|
t.data = t.data[:n]
|
|
|
|
|
|
|
|
var (
|
|
|
|
m match
|
|
|
|
x, u, v uint32
|
|
|
|
iterPred, iterSucc func() (int, bool)
|
|
|
|
)
|
|
|
|
p := matchParams{
|
|
|
|
rep: rep,
|
|
|
|
nAccept: maxMatchLen,
|
|
|
|
check: 32,
|
|
|
|
}
|
|
|
|
i := 4
|
|
|
|
iterSmall := func() (dist int, ok bool) {
|
|
|
|
i--
|
|
|
|
if i <= 0 {
|
|
|
|
return 0, false
|
|
|
|
}
|
|
|
|
return i, true
|
|
|
|
}
|
|
|
|
m, checked, accepted := t.match(m, iterSmall, p)
|
|
|
|
if accepted {
|
|
|
|
goto end
|
|
|
|
}
|
|
|
|
p.check -= checked
|
|
|
|
x = xval(t.data)
|
|
|
|
u, v = t.search(t.root, x)
|
|
|
|
if u == v && len(t.data) == 4 {
|
|
|
|
iter := func() (dist int, ok bool) {
|
|
|
|
if u == null {
|
|
|
|
return 0, false
|
|
|
|
}
|
|
|
|
dist = t.distance(u)
|
|
|
|
u, v = t.search(t.node[u].l, x)
|
|
|
|
if u != v {
|
|
|
|
u = null
|
|
|
|
}
|
|
|
|
return dist, true
|
|
|
|
}
|
|
|
|
m, _, _ = t.match(m, iter, p)
|
|
|
|
goto end
|
|
|
|
}
|
|
|
|
p.stopShorter = true
|
|
|
|
iterSucc = func() (dist int, ok bool) {
|
|
|
|
if v == null {
|
|
|
|
return 0, false
|
|
|
|
}
|
|
|
|
dist = t.distance(v)
|
|
|
|
v = t.succ(v)
|
|
|
|
return dist, true
|
|
|
|
}
|
|
|
|
m, checked, accepted = t.match(m, iterSucc, p)
|
|
|
|
if accepted {
|
|
|
|
goto end
|
|
|
|
}
|
|
|
|
p.check -= checked
|
|
|
|
iterPred = func() (dist int, ok bool) {
|
|
|
|
if u == null {
|
|
|
|
return 0, false
|
|
|
|
}
|
|
|
|
dist = t.distance(u)
|
|
|
|
u = t.pred(u)
|
|
|
|
return dist, true
|
|
|
|
}
|
|
|
|
m, _, _ = t.match(m, iterPred, p)
|
|
|
|
end:
|
|
|
|
if m.n == 0 {
|
|
|
|
return lit{t.data[0]}
|
|
|
|
}
|
|
|
|
return m
|
|
|
|
}
|