gitea/vendor/github.com/ulikunitz/xz/lzma/decoder.go
2021-02-28 18:08:33 -05:00

278 lines
6.5 KiB
Go
Vendored

// Copyright 2014-2021 Ulrich Kunitz. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzma
import (
"errors"
"fmt"
"io"
)
// decoder decodes a raw LZMA stream without any header.
type decoder struct {
// dictionary; the rear pointer of the buffer will be used for
// reading the data.
Dict *decoderDict
// decoder state
State *state
// range decoder
rd *rangeDecoder
// start stores the head value of the dictionary for the LZMA
// stream
start int64
// size of uncompressed data
size int64
// end-of-stream encountered
eos bool
// EOS marker found
eosMarker bool
}
// newDecoder creates a new decoder instance. The parameter size provides
// the expected byte size of the decompressed data. If the size is
// unknown use a negative value. In that case the decoder will look for
// a terminating end-of-stream marker.
func newDecoder(br io.ByteReader, state *state, dict *decoderDict, size int64) (d *decoder, err error) {
rd, err := newRangeDecoder(br)
if err != nil {
return nil, err
}
d = &decoder{
State: state,
Dict: dict,
rd: rd,
size: size,
start: dict.pos(),
}
return d, nil
}
// Reopen restarts the decoder with a new byte reader and a new size. Reopen
// resets the Decompressed counter to zero.
func (d *decoder) Reopen(br io.ByteReader, size int64) error {
var err error
if d.rd, err = newRangeDecoder(br); err != nil {
return err
}
d.start = d.Dict.pos()
d.size = size
d.eos = false
return nil
}
// decodeLiteral decodes a single literal from the LZMA stream.
func (d *decoder) decodeLiteral() (op operation, err error) {
litState := d.State.litState(d.Dict.byteAt(1), d.Dict.head)
match := d.Dict.byteAt(int(d.State.rep[0]) + 1)
s, err := d.State.litCodec.Decode(d.rd, d.State.state, match, litState)
if err != nil {
return nil, err
}
return lit{s}, nil
}
// errEOS indicates that an EOS marker has been found.
var errEOS = errors.New("EOS marker found")
// readOp decodes the next operation from the compressed stream. It
// returns the operation. If an explicit end of stream marker is
// identified the eos error is returned.
func (d *decoder) readOp() (op operation, err error) {
// Value of the end of stream (EOS) marker
const eosDist = 1<<32 - 1
state, state2, posState := d.State.states(d.Dict.head)
b, err := d.State.isMatch[state2].Decode(d.rd)
if err != nil {
return nil, err
}
if b == 0 {
// literal
op, err := d.decodeLiteral()
if err != nil {
return nil, err
}
d.State.updateStateLiteral()
return op, nil
}
b, err = d.State.isRep[state].Decode(d.rd)
if err != nil {
return nil, err
}
if b == 0 {
// simple match
d.State.rep[3], d.State.rep[2], d.State.rep[1] =
d.State.rep[2], d.State.rep[1], d.State.rep[0]
d.State.updateStateMatch()
// The length decoder returns the length offset.
n, err := d.State.lenCodec.Decode(d.rd, posState)
if err != nil {
return nil, err
}
// The dist decoder returns the distance offset. The actual
// distance is 1 higher.
d.State.rep[0], err = d.State.distCodec.Decode(d.rd, n)
if err != nil {
return nil, err
}
if d.State.rep[0] == eosDist {
d.eosMarker = true
return nil, errEOS
}
op = match{n: int(n) + minMatchLen,
distance: int64(d.State.rep[0]) + minDistance}
return op, nil
}
b, err = d.State.isRepG0[state].Decode(d.rd)
if err != nil {
return nil, err
}
dist := d.State.rep[0]
if b == 0 {
// rep match 0
b, err = d.State.isRepG0Long[state2].Decode(d.rd)
if err != nil {
return nil, err
}
if b == 0 {
d.State.updateStateShortRep()
op = match{n: 1, distance: int64(dist) + minDistance}
return op, nil
}
} else {
b, err = d.State.isRepG1[state].Decode(d.rd)
if err != nil {
return nil, err
}
if b == 0 {
dist = d.State.rep[1]
} else {
b, err = d.State.isRepG2[state].Decode(d.rd)
if err != nil {
return nil, err
}
if b == 0 {
dist = d.State.rep[2]
} else {
dist = d.State.rep[3]
d.State.rep[3] = d.State.rep[2]
}
d.State.rep[2] = d.State.rep[1]
}
d.State.rep[1] = d.State.rep[0]
d.State.rep[0] = dist
}
n, err := d.State.repLenCodec.Decode(d.rd, posState)
if err != nil {
return nil, err
}
d.State.updateStateRep()
op = match{n: int(n) + minMatchLen, distance: int64(dist) + minDistance}
return op, nil
}
// apply takes the operation and transforms the decoder dictionary accordingly.
func (d *decoder) apply(op operation) error {
var err error
switch x := op.(type) {
case match:
err = d.Dict.writeMatch(x.distance, x.n)
case lit:
err = d.Dict.WriteByte(x.b)
default:
panic("op is neither a match nor a literal")
}
return err
}
// decompress fills the dictionary unless no space for new data is
// available. If the end of the LZMA stream has been reached io.EOF will
// be returned.
func (d *decoder) decompress() error {
if d.eos {
return io.EOF
}
for d.Dict.Available() >= maxMatchLen {
op, err := d.readOp()
switch err {
case nil:
// break
case errEOS:
d.eos = true
if !d.rd.possiblyAtEnd() {
return errDataAfterEOS
}
if d.size >= 0 && d.size != d.Decompressed() {
return errSize
}
return io.EOF
case io.EOF:
d.eos = true
return io.ErrUnexpectedEOF
default:
return err
}
if err = d.apply(op); err != nil {
return err
}
if d.size >= 0 && d.Decompressed() >= d.size {
d.eos = true
if d.Decompressed() > d.size {
return errSize
}
if !d.rd.possiblyAtEnd() {
switch _, err = d.readOp(); err {
case nil:
return errSize
case io.EOF:
return io.ErrUnexpectedEOF
case errEOS:
break
default:
return err
}
}
return io.EOF
}
}
return nil
}
// Errors that may be returned while decoding data.
var (
errDataAfterEOS = errors.New("lzma: data after end of stream marker")
errSize = errors.New("lzma: wrong uncompressed data size")
)
// Read reads data from the buffer. If no more data is available io.EOF is
// returned.
func (d *decoder) Read(p []byte) (n int, err error) {
var k int
for {
// Read of decoder dict never returns an error.
k, err = d.Dict.Read(p[n:])
if err != nil {
panic(fmt.Errorf("dictionary read error %s", err))
}
if k == 0 && d.eos {
return n, io.EOF
}
n += k
if n >= len(p) {
return n, nil
}
if err = d.decompress(); err != nil && err != io.EOF {
return n, err
}
}
}
// Decompressed returns the number of bytes decompressed by the decoder.
func (d *decoder) Decompressed() int64 {
return d.Dict.pos() - d.start
}