blob: 70a2cbae5106c8e5bd64d4859bcc6dc36122a4ad [file] [log] [blame] [edit]
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package pkgbits
import (
"bytes"
"crypto/md5"
"encoding/binary"
"go/constant"
"io"
"math/big"
"runtime"
"strings"
)
// currentVersion is the current version number.
//
// - v0: initial prototype
//
// - v1: adds the flags uint32 word
//
// TODO(mdempsky): For the next version bump:
// - remove the legacy "has init" bool from the public root
// - remove obj's "derived func instance" bool
const currentVersion uint32 = 1
// A PkgEncoder provides methods for encoding a package's Unified IR
// export data.
type PkgEncoder struct {
// elems holds the bitstream for previously encoded elements.
elems [numRelocs][]string
// stringsIdx maps previously encoded strings to their index within
// the RelocString section, to allow deduplication. That is,
// elems[RelocString][stringsIdx[s]] == s (if present).
stringsIdx map[string]Index
// syncFrames is the number of frames to write at each sync
// marker. A negative value means sync markers are omitted.
syncFrames int
}
// SyncMarkers reports whether pw uses sync markers.
func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 }
// NewPkgEncoder returns an initialized PkgEncoder.
//
// syncFrames is the number of caller frames that should be serialized
// at Sync points. Serializing additional frames results in larger
// export data files, but can help diagnosing desync errors in
// higher-level Unified IR reader/writer code. If syncFrames is
// negative, then sync markers are omitted entirely.
func NewPkgEncoder(syncFrames int) PkgEncoder {
return PkgEncoder{
stringsIdx: make(map[string]Index),
syncFrames: syncFrames,
}
}
// DumpTo writes the package's encoded data to out0 and returns the
// package fingerprint.
func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) {
h := md5.New()
out := io.MultiWriter(out0, h)
writeUint32 := func(x uint32) {
assert(binary.Write(out, binary.LittleEndian, x) == nil)
}
writeUint32(currentVersion)
var flags uint32
if pw.SyncMarkers() {
flags |= flagSyncMarkers
}
writeUint32(flags)
// Write elemEndsEnds.
var sum uint32
for _, elems := range &pw.elems {
sum += uint32(len(elems))
writeUint32(sum)
}
// Write elemEnds.
sum = 0
for _, elems := range &pw.elems {
for _, elem := range elems {
sum += uint32(len(elem))
writeUint32(sum)
}
}
// Write elemData.
for _, elems := range &pw.elems {
for _, elem := range elems {
_, err := io.WriteString(out, elem)
assert(err == nil)
}
}
// Write fingerprint.
copy(fingerprint[:], h.Sum(nil))
_, err := out0.Write(fingerprint[:])
assert(err == nil)
return
}
// StringIdx adds a string value to the strings section, if not
// already present, and returns its index.
func (pw *PkgEncoder) StringIdx(s string) Index {
if idx, ok := pw.stringsIdx[s]; ok {
assert(pw.elems[RelocString][idx] == s)
return idx
}
idx := Index(len(pw.elems[RelocString]))
pw.elems[RelocString] = append(pw.elems[RelocString], s)
pw.stringsIdx[s] = idx
return idx
}
// NewEncoder returns an Encoder for a new element within the given
// section, and encodes the given SyncMarker as the start of the
// element bitstream.
func (pw *PkgEncoder) NewEncoder(k RelocKind, marker SyncMarker) Encoder {
e := pw.NewEncoderRaw(k)
e.Sync(marker)
return e
}
// NewEncoderRaw returns an Encoder for a new element within the given
// section.
//
// Most callers should use NewEncoder instead.
func (pw *PkgEncoder) NewEncoderRaw(k RelocKind) Encoder {
idx := Index(len(pw.elems[k]))
pw.elems[k] = append(pw.elems[k], "") // placeholder
return Encoder{
p: pw,
k: k,
Idx: idx,
}
}
// An Encoder provides methods for encoding an individual element's
// bitstream data.
type Encoder struct {
p *PkgEncoder
Relocs []RelocEnt
RelocMap map[RelocEnt]uint32
Data bytes.Buffer // accumulated element bitstream data
encodingRelocHeader bool
k RelocKind
Idx Index // index within relocation section
}
// Flush finalizes the element's bitstream and returns its Index.
func (w *Encoder) Flush() Index {
var sb strings.Builder
// Backup the data so we write the relocations at the front.
var tmp bytes.Buffer
io.Copy(&tmp, &w.Data)
// TODO(mdempsky): Consider writing these out separately so they're
// easier to strip, along with function bodies, so that we can prune
// down to just the data that's relevant to go/types.
if w.encodingRelocHeader {
panic("encodingRelocHeader already true; recursive flush?")
}
w.encodingRelocHeader = true
w.Sync(SyncRelocs)
w.Len(len(w.Relocs))
for _, rEnt := range w.Relocs {
w.Sync(SyncReloc)
w.Len(int(rEnt.Kind))
w.Len(int(rEnt.Idx))
}
io.Copy(&sb, &w.Data)
io.Copy(&sb, &tmp)
w.p.elems[w.k][w.Idx] = sb.String()
return w.Idx
}
func (w *Encoder) checkErr(err error) {
if err != nil {
errorf("unexpected encoding error: %v", err)
}
}
func (w *Encoder) rawUvarint(x uint64) {
var buf [binary.MaxVarintLen64]byte
n := binary.PutUvarint(buf[:], x)
_, err := w.Data.Write(buf[:n])
w.checkErr(err)
}
func (w *Encoder) rawVarint(x int64) {
// Zig-zag encode.
ux := uint64(x) << 1
if x < 0 {
ux = ^ux
}
w.rawUvarint(ux)
}
func (w *Encoder) rawReloc(r RelocKind, idx Index) int {
e := RelocEnt{r, idx}
if w.RelocMap != nil {
if i, ok := w.RelocMap[e]; ok {
return int(i)
}
} else {
w.RelocMap = make(map[RelocEnt]uint32)
}
i := len(w.Relocs)
w.RelocMap[e] = uint32(i)
w.Relocs = append(w.Relocs, e)
return i
}
func (w *Encoder) Sync(m SyncMarker) {
if !w.p.SyncMarkers() {
return
}
// Writing out stack frame string references requires working
// relocations, but writing out the relocations themselves involves
// sync markers. To prevent infinite recursion, we simply trim the
// stack frame for sync markers within the relocation header.
var frames []string
if !w.encodingRelocHeader && w.p.syncFrames > 0 {
pcs := make([]uintptr, w.p.syncFrames)
n := runtime.Callers(2, pcs)
frames = fmtFrames(pcs[:n]...)
}
// TODO(mdempsky): Save space by writing out stack frames as a
// linked list so we can share common stack frames.
w.rawUvarint(uint64(m))
w.rawUvarint(uint64(len(frames)))
for _, frame := range frames {
w.rawUvarint(uint64(w.rawReloc(RelocString, w.p.StringIdx(frame))))
}
}
// Bool encodes and writes a bool value into the element bitstream,
// and then returns the bool value.
//
// For simple, 2-alternative encodings, the idiomatic way to call Bool
// is something like:
//
// if w.Bool(x != 0) {
// // alternative #1
// } else {
// // alternative #2
// }
//
// For multi-alternative encodings, use Code instead.
func (w *Encoder) Bool(b bool) bool {
w.Sync(SyncBool)
var x byte
if b {
x = 1
}
err := w.Data.WriteByte(x)
w.checkErr(err)
return b
}
// Int64 encodes and writes an int64 value into the element bitstream.
func (w *Encoder) Int64(x int64) {
w.Sync(SyncInt64)
w.rawVarint(x)
}
// Uint64 encodes and writes a uint64 value into the element bitstream.
func (w *Encoder) Uint64(x uint64) {
w.Sync(SyncUint64)
w.rawUvarint(x)
}
// Len encodes and writes a non-negative int value into the element bitstream.
func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) }
// Int encodes and writes an int value into the element bitstream.
func (w *Encoder) Int(x int) { w.Int64(int64(x)) }
// Len encodes and writes a uint value into the element bitstream.
func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) }
// Reloc encodes and writes a relocation for the given (section,
// index) pair into the element bitstream.
//
// Note: Only the index is formally written into the element
// bitstream, so bitstream decoders must know from context which
// section an encoded relocation refers to.
func (w *Encoder) Reloc(r RelocKind, idx Index) {
w.Sync(SyncUseReloc)
w.Len(w.rawReloc(r, idx))
}
// Code encodes and writes a Code value into the element bitstream.
func (w *Encoder) Code(c Code) {
w.Sync(c.Marker())
w.Len(c.Value())
}
// String encodes and writes a string value into the element
// bitstream.
//
// Internally, strings are deduplicated by adding them to the strings
// section (if not already present), and then writing a relocation
// into the element bitstream.
func (w *Encoder) String(s string) {
w.StringRef(w.p.StringIdx(s))
}
// StringRef writes a reference to the given index, which must be a
// previously encoded string value.
func (w *Encoder) StringRef(idx Index) {
w.Sync(SyncString)
w.Reloc(RelocString, idx)
}
// Strings encodes and writes a variable-length slice of strings into
// the element bitstream.
func (w *Encoder) Strings(ss []string) {
w.Len(len(ss))
for _, s := range ss {
w.String(s)
}
}
// Value encodes and writes a constant.Value into the element
// bitstream.
func (w *Encoder) Value(val constant.Value) {
w.Sync(SyncValue)
if w.Bool(val.Kind() == constant.Complex) {
w.scalar(constant.Real(val))
w.scalar(constant.Imag(val))
} else {
w.scalar(val)
}
}
func (w *Encoder) scalar(val constant.Value) {
switch v := constant.Val(val).(type) {
default:
errorf("unhandled %v (%v)", val, val.Kind())
case bool:
w.Code(ValBool)
w.Bool(v)
case string:
w.Code(ValString)
w.String(v)
case int64:
w.Code(ValInt64)
w.Int64(v)
case *big.Int:
w.Code(ValBigInt)
w.bigInt(v)
case *big.Rat:
w.Code(ValBigRat)
w.bigInt(v.Num())
w.bigInt(v.Denom())
case *big.Float:
w.Code(ValBigFloat)
w.bigFloat(v)
}
}
func (w *Encoder) bigInt(v *big.Int) {
b := v.Bytes()
w.String(string(b)) // TODO: More efficient encoding.
w.Bool(v.Sign() < 0)
}
func (w *Encoder) bigFloat(v *big.Float) {
b := v.Append(nil, 'p', -1)
w.String(string(b)) // TODO: More efficient encoding.
}