diff --git a/CHANGELOG.md b/CHANGELOG.md index 67c991d8..bc67a908 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,7 +39,29 @@ Unreleased on master Changes here are on the master branch, but not in any tagged release yet. When a release tag is made, this block of bullet points will just slide down to the [Released Changes](#released-changes) section. -- _nothing yet :)_ +- Change: linking has been significantly reworked, and now primarily works through the `ipld.LinkSystem` type. + - This is cool, because it makes a lot of things less circuitous. Previously, working with links was a complicated combination of Loader and Storer functions, the Link interface contained the Load method, it was just... complicated to figure out where to start. Now, the answer is simple and constant: "Start with LinkSystem". Clearer to use; clearer to document; and also coincidentally a lot clearer to develop for, internally. + - `Link.Load` -> `LinkSystem.Load` (or, new: `LinkSystem.Fill`, which lets you control memory allocation more explicitly). + - `LinkBuilder.Build` -> `LinkSystem.Store`. + - `LinkSystem.ComputeLink` is a new feature that prodices a Link without needing to store the data anywhere. + - The `ipld.Loader` function is now most analogous to `ipld.BlockReadOpener`. You now put it into use by assigning it to a `LinkLoader`'s `StorageReadOpener` field. + - The `ipld.Storer` function is now most analogous to `ipld.BlockWriteOpener`. You now put it into use by assigning it to a `LinkLoader`'s `StorageWriteOpener` field. + - 99% of the time, you'll probably start with `linking/cid.DefaultLinkSystem()`. You can assign to fields of this to customize it further, but it'll get you started with multihashes and multicodecs and all the behavior you expect when working with CIDs. + - (So, no -- the `cidlink` package hasn't gone anywhere. Hopefully it's a bit less obtrusive now, but it's still here.) + - The `traversal` package's `Config` struct now uses a `LinkSystem` instead of a `Loader` and `Storer` pair, as you would now probably expect. + - If you had code that was also previously passing around `Loader` and `Storer`, it's likely a similar pattern of change will be the right direction for that code. +- Change: multicodec registration is now in the `go-ipld-prime/multicodec` package. + - Previously, this registry was in the `linking/cid` package. These things are now better decoupled. + - This wil require packages which register codecs to make some very small updates: e.g. `s/cidlink.RegisterMulticodecDecoder/multicodec.RegisterDecoder/`, and correspondingly, update the package imports at the top of the file. +- New: some pre-made storage options (e.g. satisfying the `ipld.StorageReadOpener` and `ipld.StorageWriteOpener` function interfaces) have appeared! Find these in the `go-ipld-prime/storage` package. + - Currently this only includes a simple in-memory storage option. This may be useful for testing and examples, but probably not much else :) + - These are mostly intended to be illustrative. You should still expect to find better storage mechanisms in other repos. +- Change: some function names in codec packages are ever-so-slightly updated. (They're verbs now, instead of nouns, which makes sense because they're functions. I have no idea what I was thinking with the previous naming. Sorry.) + - `s/dagjson.Decoder/dagjson.Decode/g` + - `s/dagjson.Decoder/dagjson.Encode/g` + - `s/dagcbor.Decoder/dagcbor.Decode/g` + - `s/dagcbor.Encoder/dagcbor.Encode/g` + - If you've only been using these indirectly, via their multicodec indicators, you won't have to update anything at all to account for this change. Released Changes diff --git a/codec/api.go b/codec/api.go index 511b08de..3e41f651 100644 --- a/codec/api.go +++ b/codec/api.go @@ -1,13 +1,10 @@ package codec import ( - "io" - "github.com/ipld/go-ipld-prime" ) -// Encoder is the essential definition of a function that takes IPLD Data Model data in memory and serializes it. -// IPLD Codecs are written by implementing this function interface (as well as (typically) a matched Decoder). +// Encoder is defined in the root ipld package; this alias is just for documentation and discoverability. // // Encoder functions can be composed into an ipld.LinkSystem to provide // a "one stop shop" API for handling content addressable storage. @@ -33,15 +30,12 @@ import ( // in all scenarios that use codecs indirectly. // There is also no standard interface for such configurations: by nature, // if they exist at all, they vary per codec. -type Encoder func(data ipld.Node, output io.Writer) error +type Encoder = ipld.Encoder -// Decoder is the essential definiton of a function that consumes serial data and unfurls it into IPLD Data Model-compatible in-memory representations. -// IPLD Codecs are written by implementing this function interface (as well as (typically) a matched Encoder). +// Decoder is defined in the root ipld package; this alias is just for documentation and discoverability. // -// Decoder is the dual of Encoder. -// Most of the documentation for the Encoder function interface -// also applies wholesale to the Decoder interface. -type Decoder func(into ipld.NodeAssembler, input io.Reader) error +// Most of the documentation for Encoder also applies wholesale to the Decoder interface. +type Decoder = ipld.Decoder type ErrBudgetExhausted struct{} diff --git a/codec/dagcbor/multicodec.go b/codec/dagcbor/multicodec.go index 34b822ac..85871049 100644 --- a/codec/dagcbor/multicodec.go +++ b/codec/dagcbor/multicodec.go @@ -5,23 +5,22 @@ import ( "github.com/polydawn/refmt/cbor" - ipld "github.com/ipld/go-ipld-prime" - cidlink "github.com/ipld/go-ipld-prime/linking/cid" + "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/multicodec" ) var ( - _ cidlink.MulticodecDecoder = Decoder - _ cidlink.MulticodecEncoder = Encoder + _ ipld.Decoder = Decode + _ ipld.Encoder = Encode ) func init() { - cidlink.RegisterMulticodecDecoder(0x71, Decoder) - cidlink.RegisterMulticodecEncoder(0x71, Encoder) + multicodec.RegisterEncoder(0x71, Encode) + multicodec.RegisterDecoder(0x71, Decode) } -func Decoder(na ipld.NodeAssembler, r io.Reader) error { +func Decode(na ipld.NodeAssembler, r io.Reader) error { // Probe for a builtin fast path. Shortcut to that if possible. - // (ipldcbor.NodeBuilder supports this, for example.) type detectFastPath interface { DecodeDagCbor(io.Reader) error } @@ -32,9 +31,8 @@ func Decoder(na ipld.NodeAssembler, r io.Reader) error { return Unmarshal(na, cbor.NewDecoder(cbor.DecodeOptions{}, r)) } -func Encoder(n ipld.Node, w io.Writer) error { +func Encode(n ipld.Node, w io.Writer) error { // Probe for a builtin fast path. Shortcut to that if possible. - // (ipldcbor.Node supports this, for example.) type detectFastPath interface { EncodeDagCbor(io.Writer) error } diff --git a/codec/dagcbor/roundtripCidlink_test.go b/codec/dagcbor/roundtripCidlink_test.go index 42edb38d..8839c393 100644 --- a/codec/dagcbor/roundtripCidlink_test.go +++ b/codec/dagcbor/roundtripCidlink_test.go @@ -2,7 +2,6 @@ package dagcbor import ( "bytes" - "context" "io" "testing" @@ -15,27 +14,26 @@ import ( ) func TestRoundtripCidlink(t *testing.T) { - lb := cidlink.LinkBuilder{cid.Prefix{ + lp := cidlink.LinkPrototype{cid.Prefix{ Version: 1, Codec: 0x71, - MhType: 0x17, + MhType: 0x13, MhLength: 4, }} + lsys := cidlink.DefaultLinkSystem() buf := bytes.Buffer{} - lnk, err := lb.Build(context.Background(), ipld.LinkContext{}, n, - func(ipld.LinkContext) (io.Writer, ipld.StoreCommitter, error) { - return &buf, func(lnk ipld.Link) error { return nil }, nil - }, - ) + lsys.StorageWriteOpener = func(lnkCtx ipld.LinkContext) (io.Writer, ipld.BlockWriteCommitter, error) { + return &buf, func(lnk ipld.Link) error { return nil }, nil + } + lsys.StorageReadOpener = func(lnkCtx ipld.LinkContext, lnk ipld.Link) (io.Reader, error) { + return bytes.NewReader(buf.Bytes()), nil + } + + lnk, err := lsys.Store(ipld.LinkContext{}, lp, n) Require(t, err, ShouldEqual, nil) - nb := basicnode.Prototype__Any{}.NewBuilder() - err = lnk.Load(context.Background(), ipld.LinkContext{}, nb, - func(lnk ipld.Link, _ ipld.LinkContext) (io.Reader, error) { - return bytes.NewReader(buf.Bytes()), nil - }, - ) + n2, err := lsys.Load(ipld.LinkContext{}, lnk, basicnode.Prototype.Any) Require(t, err, ShouldEqual, nil) - Wish(t, nb.Build(), ShouldEqual, n) + Wish(t, n2, ShouldEqual, n) } diff --git a/codec/dagcbor/roundtrip_test.go b/codec/dagcbor/roundtrip_test.go index 7479ff8f..400db482 100644 --- a/codec/dagcbor/roundtrip_test.go +++ b/codec/dagcbor/roundtrip_test.go @@ -2,16 +2,13 @@ package dagcbor import ( "bytes" - "context" "crypto/rand" - "io" "strings" "testing" cid "github.com/ipfs/go-cid" . "github.com/warpfork/go-wish" - ipld "github.com/ipld/go-ipld-prime" "github.com/ipld/go-ipld-prime/fluent" cidlink "github.com/ipld/go-ipld-prime/linking/cid" basicnode "github.com/ipld/go-ipld-prime/node/basic" @@ -38,14 +35,14 @@ var serial = "\xa4eplainkolde stringcmap\xa2cone\x01ctwo\x02dlist\x82ethreedfour func TestRoundtrip(t *testing.T) { t.Run("encoding", func(t *testing.T) { var buf bytes.Buffer - err := Encoder(n, &buf) + err := Encode(n, &buf) Require(t, err, ShouldEqual, nil) Wish(t, buf.String(), ShouldEqual, serial) }) t.Run("decoding", func(t *testing.T) { buf := strings.NewReader(serial) nb := basicnode.Prototype__Map{}.NewBuilder() - err := Decoder(nb, buf) + err := Decode(nb, buf) Require(t, err, ShouldEqual, nil) Wish(t, nb.Build(), ShouldEqual, n) }) @@ -57,33 +54,26 @@ func TestRoundtripScalar(t *testing.T) { simple := nb.Build() t.Run("encoding", func(t *testing.T) { var buf bytes.Buffer - err := Encoder(simple, &buf) + err := Encode(simple, &buf) Require(t, err, ShouldEqual, nil) Wish(t, buf.String(), ShouldEqual, `japplesauce`) }) t.Run("decoding", func(t *testing.T) { buf := strings.NewReader(`japplesauce`) nb := basicnode.Prototype__String{}.NewBuilder() - err := Decoder(nb, buf) + err := Decode(nb, buf) Require(t, err, ShouldEqual, nil) Wish(t, nb.Build(), ShouldEqual, simple) }) } func TestRoundtripLinksAndBytes(t *testing.T) { - lb := cidlink.LinkBuilder{cid.Prefix{ + lnk := cidlink.LinkPrototype{cid.Prefix{ Version: 1, Codec: 0x71, - MhType: 0x17, + MhType: 0x13, MhLength: 4, - }} - buf := bytes.Buffer{} - lnk, err := lb.Build(context.Background(), ipld.LinkContext{}, n, - func(ipld.LinkContext) (io.Writer, ipld.StoreCommitter, error) { - return &buf, func(lnk ipld.Link) error { return nil }, nil - }, - ) - Require(t, err, ShouldEqual, nil) + }}.BuildLink([]byte{1, 2, 3, 4}) // dummy value, content does not matter to this test. var linkByteNode = fluent.MustBuildMap(basicnode.Prototype__Map{}, 4, func(na fluent.MapAssembler) { nva := na.AssembleEntry("Link") @@ -94,11 +84,11 @@ func TestRoundtripLinksAndBytes(t *testing.T) { nva.AssignBytes(bytes) }) - buf.Reset() - err = Encoder(linkByteNode, &buf) + buf := bytes.Buffer{} + err := Encode(linkByteNode, &buf) Require(t, err, ShouldEqual, nil) nb := basicnode.Prototype__Map{}.NewBuilder() - err = Decoder(nb, &buf) + err = Decode(nb, &buf) Require(t, err, ShouldEqual, nil) reconstructed := nb.Build() Wish(t, reconstructed, ShouldEqual, linkByteNode) diff --git a/codec/dagcbor/unmarshal_test.go b/codec/dagcbor/unmarshal_test.go index bfbe6025..c373c341 100644 --- a/codec/dagcbor/unmarshal_test.go +++ b/codec/dagcbor/unmarshal_test.go @@ -14,28 +14,28 @@ func TestFunBlocks(t *testing.T) { // This fixture has a zero length link -- not even the multibase byte (which dag-cbor insists must be zero) is there. buf := strings.NewReader("\x8d\x8d\x97\xd8*@") nb := basicnode.Prototype.Any.NewBuilder() - err := Decoder(nb, buf) + err := Decode(nb, buf) Require(t, err, ShouldEqual, ErrInvalidMultibase) }) t.Run("fuzz001", func(t *testing.T) { // This fixture might cause an overly large allocation if you aren't careful to have resource budgets. buf := strings.NewReader("\x9a\xff000") nb := basicnode.Prototype.Any.NewBuilder() - err := Decoder(nb, buf) + err := Decode(nb, buf) Require(t, err, ShouldEqual, ErrAllocationBudgetExceeded) }) t.Run("fuzz002", func(t *testing.T) { // This fixture might cause an overly large allocation if you aren't careful to have resource budgets. buf := strings.NewReader("\x9f\x9f\x9f\x9f\x9f\x9f\x9f\x9f\x9f\x9f\x9f\x9f\x9f\x9f\x9f\x9f\x9f\x9f\x9f\x9f\x9a\xff000") nb := basicnode.Prototype.Any.NewBuilder() - err := Decoder(nb, buf) + err := Decode(nb, buf) Require(t, err, ShouldEqual, ErrAllocationBudgetExceeded) }) t.Run("fuzz003", func(t *testing.T) { // This fixture might cause an overly large allocation if you aren't careful to have resource budgets. buf := strings.NewReader("\x9f\x9f\x9f\x9f\x9f\x9f\x9f\xbb00000000") nb := basicnode.Prototype.Any.NewBuilder() - err := Decoder(nb, buf) + err := Decode(nb, buf) Require(t, err, ShouldEqual, ErrAllocationBudgetExceeded) }) } diff --git a/codec/dagjson/multicodec.go b/codec/dagjson/multicodec.go index e54d3aff..f0b693ad 100644 --- a/codec/dagjson/multicodec.go +++ b/codec/dagjson/multicodec.go @@ -6,21 +6,21 @@ import ( "github.com/polydawn/refmt/json" - ipld "github.com/ipld/go-ipld-prime" - cidlink "github.com/ipld/go-ipld-prime/linking/cid" + "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/multicodec" ) var ( - _ cidlink.MulticodecDecoder = Decoder - _ cidlink.MulticodecEncoder = Encoder + _ ipld.Decoder = Decode + _ ipld.Encoder = Encode ) func init() { - cidlink.RegisterMulticodecDecoder(0x0129, Decoder) - cidlink.RegisterMulticodecEncoder(0x0129, Encoder) + multicodec.RegisterEncoder(0x0129, Encode) + multicodec.RegisterDecoder(0x0129, Decode) } -func Decoder(na ipld.NodeAssembler, r io.Reader) error { +func Decode(na ipld.NodeAssembler, r io.Reader) error { // Shell out directly to generic builder path. // (There's not really any fastpaths of note for json.) err := Unmarshal(na, json.NewDecoder(r)) @@ -52,7 +52,7 @@ func Decoder(na ipld.NodeAssembler, r io.Reader) error { return err } -func Encoder(n ipld.Node, w io.Writer) error { +func Encode(n ipld.Node, w io.Writer) error { // Shell out directly to generic inspection path. // (There's not really any fastpaths of note for json.) // Write another function if you need to tune encoding options about whitespace. diff --git a/codec/dagjson/roundtripCidlink_test.go b/codec/dagjson/roundtripCidlink_test.go index 58b894a8..43c63bb0 100644 --- a/codec/dagjson/roundtripCidlink_test.go +++ b/codec/dagjson/roundtripCidlink_test.go @@ -2,9 +2,7 @@ package dagjson import ( "bytes" - "context" "io" - "io/ioutil" "strings" "testing" @@ -17,29 +15,28 @@ import ( ) func TestRoundtripCidlink(t *testing.T) { - lb := cidlink.LinkBuilder{cid.Prefix{ + lp := cidlink.LinkPrototype{cid.Prefix{ Version: 1, Codec: 0x0129, - MhType: 0x17, + MhType: 0x13, MhLength: 4, }} + lsys := cidlink.DefaultLinkSystem() buf := bytes.Buffer{} - lnk, err := lb.Build(context.Background(), ipld.LinkContext{}, n, - func(ipld.LinkContext) (io.Writer, ipld.StoreCommitter, error) { - return &buf, func(lnk ipld.Link) error { return nil }, nil - }, - ) + lsys.StorageWriteOpener = func(lnkCtx ipld.LinkContext) (io.Writer, ipld.BlockWriteCommitter, error) { + return &buf, func(lnk ipld.Link) error { return nil }, nil + } + lsys.StorageReadOpener = func(lnkCtx ipld.LinkContext, lnk ipld.Link) (io.Reader, error) { + return bytes.NewReader(buf.Bytes()), nil + } + + lnk, err := lsys.Store(ipld.LinkContext{}, lp, n) Require(t, err, ShouldEqual, nil) - nb := basicnode.Prototype__Any{}.NewBuilder() - err = lnk.Load(context.Background(), ipld.LinkContext{}, nb, - func(lnk ipld.Link, _ ipld.LinkContext) (io.Reader, error) { - return bytes.NewReader(buf.Bytes()), nil - }, - ) + n2, err := lsys.Load(ipld.LinkContext{}, lnk, basicnode.Prototype.Any) Require(t, err, ShouldEqual, nil) - Wish(t, nb.Build(), ShouldEqual, n) + Wish(t, n2, ShouldEqual, n) } // Make sure that a map that *almost* looks like a link is handled safely. @@ -48,24 +45,19 @@ func TestRoundtripCidlink(t *testing.T) { // tokens have to be reprocessed before a recursion that find a real link appears. func TestUnmarshalTrickyMapContainingLink(t *testing.T) { // Create a link; don't particularly care about its contents. - lnk, err := cidlink.LinkBuilder{cid.Prefix{ + lnk := cidlink.LinkPrototype{cid.Prefix{ Version: 1, - Codec: 0x0129, - MhType: 0x17, + Codec: 0x71, + MhType: 0x13, MhLength: 4, - }}.Build(context.Background(), ipld.LinkContext{}, n, - func(ipld.LinkContext) (io.Writer, ipld.StoreCommitter, error) { - return ioutil.Discard, func(lnk ipld.Link) error { return nil }, nil - }, - ) - Require(t, err, ShouldEqual, nil) + }}.BuildLink([]byte{1, 2, 3, 4}) // dummy value, content does not matter to this test. // Compose the tricky corpus. (lnk.String "happens" to work here, although this isn't recommended or correct in general.) tricky := `{"/":{"/":"` + lnk.String() + `"}}` // Unmarshal. Hopefully we get a map with a link in it. nb := basicnode.Prototype__Any{}.NewBuilder() - err = Decoder(nb, strings.NewReader(tricky)) + err := Decode(nb, strings.NewReader(tricky)) Require(t, err, ShouldEqual, nil) n := nb.Build() Wish(t, n.Kind(), ShouldEqual, ipld.Kind_Map) diff --git a/codec/dagjson/roundtrip_test.go b/codec/dagjson/roundtrip_test.go index 6e02a4f7..757fc6c2 100644 --- a/codec/dagjson/roundtrip_test.go +++ b/codec/dagjson/roundtrip_test.go @@ -48,14 +48,14 @@ var serial = `{ func TestRoundtrip(t *testing.T) { t.Run("encoding", func(t *testing.T) { var buf bytes.Buffer - err := Encoder(n, &buf) + err := Encode(n, &buf) Require(t, err, ShouldEqual, nil) Wish(t, buf.String(), ShouldEqual, serial) }) t.Run("decoding", func(t *testing.T) { buf := strings.NewReader(serial) nb := basicnode.Prototype__Map{}.NewBuilder() - err := Decoder(nb, buf) + err := Decode(nb, buf) Require(t, err, ShouldEqual, nil) Wish(t, nb.Build(), ShouldEqual, n) }) @@ -67,14 +67,14 @@ func TestRoundtripScalar(t *testing.T) { simple := nb.Build() t.Run("encoding", func(t *testing.T) { var buf bytes.Buffer - err := Encoder(simple, &buf) + err := Encode(simple, &buf) Require(t, err, ShouldEqual, nil) Wish(t, buf.String(), ShouldEqual, `"applesauce"`) }) t.Run("decoding", func(t *testing.T) { buf := strings.NewReader(`"applesauce"`) nb := basicnode.Prototype__String{}.NewBuilder() - err := Decoder(nb, buf) + err := Decode(nb, buf) Require(t, err, ShouldEqual, nil) Wish(t, nb.Build(), ShouldEqual, simple) }) diff --git a/codec/jst/demo/main.go b/codec/jst/demo/main.go index be95ee65..a90f14ff 100644 --- a/codec/jst/demo/main.go +++ b/codec/jst/demo/main.go @@ -23,7 +23,7 @@ func main() { ]} ]` nb := basicnode.Prototype.Any.NewBuilder() - if err := dagjson.Decoder(nb, strings.NewReader(fixture)); err != nil { + if err := dagjson.Decode(nb, strings.NewReader(fixture)); err != nil { panic(err) } n := nb.Build() diff --git a/codec/jst/jst_test.go b/codec/jst/jst_test.go index 4eac70ec..d17c45fe 100644 --- a/codec/jst/jst_test.go +++ b/codec/jst/jst_test.go @@ -19,7 +19,7 @@ func TestSimple(t *testing.T) { {"path": "./quxx", "moduleName": "example.net/quxx", "status": "lit"} ]`) nb := basicnode.Prototype.Any.NewBuilder() - Require(t, dagjson.Decoder(nb, strings.NewReader(fixture)), ShouldEqual, nil) + Require(t, dagjson.Decode(nb, strings.NewReader(fixture)), ShouldEqual, nil) n := nb.Build() st := state{} @@ -48,7 +48,7 @@ func TestAbsentColumn(t *testing.T) { {"path": "./quxx", "optionalColumn": "wicked", "status": "lit"} ]`) nb := basicnode.Prototype.Any.NewBuilder() - Require(t, dagjson.Decoder(nb, strings.NewReader(fixture)), ShouldEqual, nil) + Require(t, dagjson.Decode(nb, strings.NewReader(fixture)), ShouldEqual, nil) n := nb.Build() var buf bytes.Buffer @@ -63,7 +63,7 @@ func TestAbsentColumn(t *testing.T) { {"path": "./quxx", "status": "lit", "optionalColumn": "wicked"} ]`) nb := basicnode.Prototype.Any.NewBuilder() - Require(t, dagjson.Decoder(nb, strings.NewReader(fixture)), ShouldEqual, nil) + Require(t, dagjson.Decode(nb, strings.NewReader(fixture)), ShouldEqual, nil) n := nb.Build() var buf bytes.Buffer @@ -88,7 +88,7 @@ func TestSubTables(t *testing.T) { {"path": "./quxx", "moduleName": "example.net/quxx", "status": "lit"} ]`) nb := basicnode.Prototype.Any.NewBuilder() - Require(t, dagjson.Decoder(nb, strings.NewReader(fixture)), ShouldEqual, nil) + Require(t, dagjson.Decode(nb, strings.NewReader(fixture)), ShouldEqual, nil) n := nb.Build() var buf bytes.Buffer @@ -112,7 +112,7 @@ func TestSubTablesCorrelated(t *testing.T) { ]} ]`) nb := basicnode.Prototype.Any.NewBuilder() - Require(t, dagjson.Decoder(nb, strings.NewReader(fixture)), ShouldEqual, nil) + Require(t, dagjson.Decode(nb, strings.NewReader(fixture)), ShouldEqual, nil) n := nb.Build() var buf bytes.Buffer @@ -144,7 +144,7 @@ func TestSubSubTables(t *testing.T) { ]} ]`) nb := basicnode.Prototype.Any.NewBuilder() - Require(t, dagjson.Decoder(nb, strings.NewReader(fixture)), ShouldEqual, nil) + Require(t, dagjson.Decode(nb, strings.NewReader(fixture)), ShouldEqual, nil) n := nb.Build() var buf bytes.Buffer diff --git a/codec/raw/codec.go b/codec/raw/codec.go index 126a573b..1cb41daf 100644 --- a/codec/raw/codec.go +++ b/codec/raw/codec.go @@ -12,15 +12,20 @@ import ( "io/ioutil" ipld "github.com/ipld/go-ipld-prime" - cidlink "github.com/ipld/go-ipld-prime/linking/cid" + "github.com/ipld/go-ipld-prime/multicodec" ) // TODO(mvdan): make go-ipld-prime use go-multicodec soon const rawMulticodec = 0x55 +var ( + _ ipld.Decoder = Decode + _ ipld.Encoder = Encode +) + func init() { - cidlink.RegisterMulticodecDecoder(rawMulticodec, Decode) - cidlink.RegisterMulticodecEncoder(rawMulticodec, Encode) + multicodec.RegisterEncoder(rawMulticodec, Encode) + multicodec.RegisterDecoder(rawMulticodec, Decode) } // Decode implements decoding of a node with the raw codec. diff --git a/codec/raw/codec_test.go b/codec/raw/codec_test.go index accc09f3..215c8ea9 100644 --- a/codec/raw/codec_test.go +++ b/codec/raw/codec_test.go @@ -2,7 +2,6 @@ package raw import ( "bytes" - "context" "fmt" "io" "testing" @@ -48,30 +47,30 @@ func TestRoundtrip(t *testing.T) { func TestRoundtripCidlink(t *testing.T) { t.Parallel() - lb := cidlink.LinkBuilder{Prefix: cid.Prefix{ + lp := cidlink.LinkPrototype{Prefix: cid.Prefix{ Version: 1, Codec: rawMulticodec, - MhType: 0x17, + MhType: 0x13, MhLength: 4, }} node := basicnode.NewBytes([]byte("hello there")) + lsys := cidlink.DefaultLinkSystem() + buf := bytes.Buffer{} - lnk, err := lb.Build(context.Background(), ipld.LinkContext{}, node, - func(ipld.LinkContext) (io.Writer, ipld.StoreCommitter, error) { - return &buf, func(lnk ipld.Link) error { return nil }, nil - }, - ) + lsys.StorageWriteOpener = func(lnkCtx ipld.LinkContext) (io.Writer, ipld.BlockWriteCommitter, error) { + return &buf, func(lnk ipld.Link) error { return nil }, nil + } + lsys.StorageReadOpener = func(lnkCtx ipld.LinkContext, lnk ipld.Link) (io.Reader, error) { + return bytes.NewReader(buf.Bytes()), nil + } + lnk, err := lsys.Store(ipld.LinkContext{}, lp, node) + qt.Assert(t, err, qt.IsNil) - nb := basicnode.Prototype__Any{}.NewBuilder() - err = lnk.Load(context.Background(), ipld.LinkContext{}, nb, - func(lnk ipld.Link, _ ipld.LinkContext) (io.Reader, error) { - return bytes.NewReader(buf.Bytes()), nil - }, - ) + newNode, err := lsys.Load(ipld.LinkContext{}, lnk, basicnode.Prototype__Any{}) qt.Assert(t, err, qt.IsNil) - qt.Assert(t, nb.Build(), qt.DeepEquals, node) + qt.Assert(t, newNode, qt.DeepEquals, node) } // mustOnlyUseRead only exposes Read, hiding Bytes. diff --git a/errors.go b/errors.go index ed600ae6..2376d94b 100644 --- a/errors.go +++ b/errors.go @@ -123,6 +123,18 @@ func (e ErrInvalidSegmentForList) Error() string { return v + fmt.Sprintf(": %q: %s", e.TroubleSegment.s, e.Reason) } +// ErrHashMismatch is the error returned when loading data and verifying its hash +// and finding that the loaded data doesn't re-hash to the expected value. +// It is typically seen returned by functions like LinkSystem.Load or LinkSystem.Fill. +type ErrHashMismatch struct { + Actual Link + Expected Link +} + +func (e ErrHashMismatch) Error() string { + return fmt.Sprintf("hash mismatch! %v (actual) != %v (expected)", e.Actual, e.Expected) +} + // ErrUnmatchable is the error raised when processing data with IPLD Schemas and // finding data which cannot be matched into the schema. // It will be returned by NodeAssemblers and NodeBuilders when they are fed unmatchable data. diff --git a/exampleLinking_test.go b/exampleLinking_test.go deleted file mode 100644 index db75799e..00000000 --- a/exampleLinking_test.go +++ /dev/null @@ -1,117 +0,0 @@ -package ipld_test - -import ( - "bytes" - "context" - "fmt" - "io" - - "github.com/ipfs/go-cid" - - ipld "github.com/ipld/go-ipld-prime" - _ "github.com/ipld/go-ipld-prime/codec/dagcbor" - "github.com/ipld/go-ipld-prime/fluent" - cidlink "github.com/ipld/go-ipld-prime/linking/cid" - basicnode "github.com/ipld/go-ipld-prime/node/basic" -) - -// storage is a map where we'll store serialized IPLD data. -// -// ExampleCreatingLink will put data into this; -// ExampleLoadingLink will read out from it. -// -// In a real program, you'll probably make functions to load and store from disk, -// or some network storage, or... whatever you want, really :) -var storage = make(map[ipld.Link][]byte) - -func ExampleCreatingLink() { - // Creating a link is done by choosing a concrete link implementation (typically, CID), - // importing that package, and using its functions to create the link. - - // First, create a LinkBuilder. This gathers together any parameters that might be needed when making a link. - // (For CIDs, the version, the codec, and the multihash type are all parameters we'll need.) - lb := cidlink.LinkBuilder{cid.Prefix{ - Version: 1, // Usually '1'. - Codec: 0x71, // 0x71 means "dag-cbor" -- See the multicodecs table: https://github.com/multiformats/multicodec/ - MhType: 0x15, // 0x15 means "sha3-384" -- See the multicodecs table: https://github.com/multiformats/multicodec/ - MhLength: 48, // sha3-224 hash has a 48-byte sum. - }} - - // And we need some data to link to! Here's a quick piece of example data: - n := fluent.MustBuildMap(basicnode.Prototype.Map, 1, func(na fluent.MapAssembler) { - na.AssembleEntry("hello").AssignString("world") - }) - - // Building a link takes a bunch of arguments: - // - a `context.Context` -- this is a standard way to support cancellability in long-running tasks in golang. - // (Hashing to form a link is fast -- but you might be writing to a slow storage medium at the same time.) - // - an `ipld.LinkContext` -- this can provide additional info (like a path -- the traversal package will do this), but can also be empty. - // - the `ipld.Node` to serialize and create the link for! - // - an `ipld.Storer` -- this is a function that defines where the serialized Node is written to. - lnk, err := lb.Build( - context.Background(), - ipld.LinkContext{}, - n, - func(ipld.LinkContext) (io.Writer, ipld.StoreCommitter, error) { - buf := bytes.Buffer{} - return &buf, func(lnk ipld.Link) error { - storage[lnk] = buf.Bytes() - return nil - }, nil - }, - ) - if err != nil { - panic(err) - } - - // That's it! We got a link. - fmt.Printf("link: %s\n", lnk) - fmt.Printf("concrete type: `%T`\n", lnk) - - // Output: - // link: bafyrkmbukvrgzcs6qlsh4wvkvbe5wp7sclcblfnapnb2xfznisbykpbnlocet2qzley3cpxofoxqrnqgm3ta - // concrete type: `cidlink.Link` -} - -func ExampleLoadingLink() { - // Let's say we want to load this link (it's the same one we just created in the example above). - cid, _ := cid.Decode("bafyrkmbukvrgzcs6qlsh4wvkvbe5wp7sclcblfnapnb2xfznisbykpbnlocet2qzley3cpxofoxqrnqgm3ta") - lnk := cidlink.Link{cid} - - // First, we'll need a Loader. This function has to take a link as a parameter, - // then decides where to get the referenced raw data from, - // and returns that as a standard `io.Reader`. - var loader ipld.Loader = func(lnk ipld.Link, _ ipld.LinkContext) (io.Reader, error) { - return bytes.NewReader(storage[lnk]), nil - } - - // Second, we'll need to decide what in-memory implementation of ipld.Node we want to use. - // Here, we'll use the "basicnode" implementation. - // But you could also use other implementations, or even a code-generated type with special features! - // To encapsulate this decision, we create a NodeBuilder for the implementation we want. - // (If you are building a library and want to expose this choice, though, you'd probably want to accept a NodePrototype as the configuration for this.) - nb := basicnode.Prototype.Any.NewBuilder() - - // Tell the link to load itself! - // This only returns an error... - // the data itself gets loaded into the NodeBuilder. - // (It's kinda like how if you use stdlib's `json.Unmarshal`, you give it `&something` as a parameter, and it fills that in.) - err := lnk.Load( - context.Background(), // As with creating links, a context here is so you can interrupt the process if it's slow. - ipld.LinkContext{}, // The LinkContext can provide more info, but it's also fine if it's empty. - nb, // Here's the NodeBuilder we'll pour the unmarshalled data into. - loader, // The loader is called to get the io.Reader for the raw data. - ) - if err != nil { - panic(err) - } - - // We can get the reified data from the NodeBuilder: - n := nb.Build() - - // Tada! We have the data as node that we can traverse and use as desired. - fmt.Printf("we loaded a %s with %d entries\n", n.Kind(), n.Length()) - - // Output: - // we loaded a map with 1 entries -} diff --git a/examples_test.go b/examples_test.go index 54a43662..264d2fab 100644 --- a/examples_test.go +++ b/examples_test.go @@ -21,7 +21,7 @@ func ExampleCreateDataAndMarshal() { ma.Finish() // Call 'Finish' on the map assembly to let it know no more data is coming. n := nb.Build() // Call 'Build' to get the resulting Node. (It's immutable!) - dagjson.Encoder(n, os.Stdout) + dagjson.Encode(n, os.Stdout) // Output: // { @@ -35,7 +35,7 @@ func ExampleUnmarshalData() { np := basicnode.Prototype.Any // Pick a stle for the in-memory data. nb := np.NewBuilder() // Create a builder. - dagjson.Decoder(nb, serial) // Hand the builder to decoding -- decoding will fill it in! + dagjson.Decode(nb, serial) // Hand the builder to decoding -- decoding will fill it in! n := nb.Build() // Call 'Build' to get the resulting Node. (It's immutable!) fmt.Printf("the data decoded was a %s kind\n", n.Kind()) diff --git a/fluent/bench_test.go b/fluent/bench_test.go index b0db42eb..b32c533a 100644 --- a/fluent/bench_test.go +++ b/fluent/bench_test.go @@ -155,7 +155,7 @@ func BenchmarkUnmarshal(b *testing.B) { r := strings.NewReader(serial) for i := 0; i < b.N; i++ { nb := basicnode.Prototype.Any.NewBuilder() - err = dagjson.Decoder(nb, r) + err = dagjson.Decode(nb, r) n = nb.Build() r.Reset(serial) } diff --git a/fluent/qp/example_test.go b/fluent/qp/example_test.go index e8a4632f..3e0eba9f 100644 --- a/fluent/qp/example_test.go +++ b/fluent/qp/example_test.go @@ -27,7 +27,7 @@ func Example() { if err != nil { panic(err) } - dagjson.Encoder(n, os.Stdout) + dagjson.Encode(n, os.Stdout) // Output: // { diff --git a/fluent/quip/quip_example_test.go b/fluent/quip/quip_example_test.go index 986ed736..132b88c2 100644 --- a/fluent/quip/quip_example_test.go +++ b/fluent/quip/quip_example_test.go @@ -30,7 +30,7 @@ func Example() { if err != nil { panic(err) } - dagjson.Encoder(n, os.Stdout) + dagjson.Encode(n, os.Stdout) // Output: // { diff --git a/go.mod b/go.mod index b42b7849..7704ed54 100644 --- a/go.mod +++ b/go.mod @@ -5,11 +5,8 @@ go 1.14 require ( github.com/frankban/quicktest v1.11.3 github.com/ipfs/go-cid v0.0.4 - github.com/minio/sha256-simd v0.1.1 // indirect - github.com/mr-tron/base58 v1.1.3 // indirect + github.com/multiformats/go-multihash v0.0.15 github.com/polydawn/refmt v0.0.0-20190807091052-3d65705ee9f1 github.com/smartystreets/goconvey v1.6.4 // indirect github.com/warpfork/go-wish v0.0.0-20200122115046-b9ea61034e4a - golang.org/x/crypto v0.0.0-20200117160349-530e935923ad // indirect - golang.org/x/sys v0.0.0-20200122134326-e047566fdf82 // indirect ) diff --git a/go.sum b/go.sum index 25c92d9e..bc294e32 100644 --- a/go.sum +++ b/go.sum @@ -8,6 +8,8 @@ github.com/ipfs/go-cid v0.0.4 h1:UlfXKrZx1DjZoBhQHmNHLC1fK1dUJDN20Y28A7s+gJ8= github.com/ipfs/go-cid v0.0.4/go.mod h1:4LLaPOQwmk5z9LBgQnpkivrx8BJjUyGwTXCd5Xfj6+M= github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= +github.com/klauspost/cpuid/v2 v2.0.4 h1:g0I61F2K2DjRHz1cnxlkNSBIaePVoJIjjnHui8QHbiw= +github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= @@ -16,18 +18,22 @@ github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1 h1:lYpkrQH5ajf0OXOcUbGjvZxxijuBwbbmlSxLiuofa+g= github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1/go.mod h1:pD8RvIylQ358TN4wwqatJ8rNavkEINozVn9DtGI3dfQ= github.com/minio/sha256-simd v0.1.1-0.20190913151208-6de447530771/go.mod h1:B5e1o+1/KgNmWrSQK08Y6Z1Vb5pwIktudl0J58iy0KM= -github.com/minio/sha256-simd v0.1.1 h1:5QHSlgo3nt5yKOJrC7W8w7X+NFl8cMPZm96iu8kKUJU= -github.com/minio/sha256-simd v0.1.1/go.mod h1:B5e1o+1/KgNmWrSQK08Y6Z1Vb5pwIktudl0J58iy0KM= +github.com/minio/sha256-simd v1.0.0 h1:v1ta+49hkWZyvaKwrQB8elexRqm6Y0aMLjCNsrYxo6g= +github.com/minio/sha256-simd v1.0.0/go.mod h1:OuYzVNI5vcoYIAmbIvHPl3N3jUzVedXbKy5RFepssQM= github.com/mr-tron/base58 v1.1.0/go.mod h1:xcD2VGqlgYjBdcBLw+TuYLr8afG+Hj8g2eTVqeSzSU8= github.com/mr-tron/base58 v1.1.2/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= -github.com/mr-tron/base58 v1.1.3 h1:v+sk57XuaCKGXpWtVBX8YJzO7hMGx4Aajh4TQbdEFdc= -github.com/mr-tron/base58 v1.1.3/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= +github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o= +github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= github.com/multiformats/go-base32 v0.0.3 h1:tw5+NhuwaOjJCC5Pp82QuXbrmLzWg7uxlMFp8Nq/kkI= github.com/multiformats/go-base32 v0.0.3/go.mod h1:pLiuGC8y0QR3Ue4Zug5UzK9LjgbkL8NSQj0zQ5Nz/AA= github.com/multiformats/go-multibase v0.0.1 h1:PN9/v21eLywrFWdFNsFKaU04kLJzuYzmrJR+ubhT9qA= github.com/multiformats/go-multibase v0.0.1/go.mod h1:bja2MqRZ3ggyXtZSEDKpl0uO/gviWFaSteVbWT51qgs= github.com/multiformats/go-multihash v0.0.10 h1:lMoNbh2Ssd9PUF74Nz008KGzGPlfeV6wH3rit5IIGCM= github.com/multiformats/go-multihash v0.0.10/go.mod h1:YSLudS+Pi8NHE7o6tb3D8vrpKa63epEDmG8nTduyAew= +github.com/multiformats/go-multihash v0.0.15 h1:hWOPdrNqDjwHDx82vsYGSDZNyktOJJ2dzZJzFkOV1jM= +github.com/multiformats/go-multihash v0.0.15/go.mod h1:D6aZrWNLFTV/ynMpKsNtB40mJzmCl4jb1alC0OvHiHg= +github.com/multiformats/go-varint v0.0.6 h1:gk85QWKxh3TazbLxED/NlDVv8+q+ReFJk7Y2W/KhfNY= +github.com/multiformats/go-varint v0.0.6/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE= github.com/polydawn/refmt v0.0.0-20190807091052-3d65705ee9f1 h1:CskT+S6Ay54OwxBGB0R3Rsx4Muto6UnEYTyKJbyRIAI= github.com/polydawn/refmt v0.0.0-20190807091052-3d65705ee9f1/go.mod h1:uIp+gprXxxrWSjjklXD+mN4wed/tMfjMMmN/9+JsA9o= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykElWQ6/NYmHa3jpm/yHnI4xSofP+UP6SpjHcSeM= @@ -40,14 +46,16 @@ github.com/warpfork/go-wish v0.0.0-20200122115046-b9ea61034e4a h1:G++j5e0OC488te github.com/warpfork/go-wish v0.0.0-20200122115046-b9ea61034e4a/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200117160349-530e935923ad h1:Jh8cai0fqIK+f6nG0UgPW5wFk8wmiMhM3AyciDBdtQg= -golang.org/x/crypto v0.0.0-20200117160349-530e935923ad/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20210220033148-5ea612d1eb83 h1:/ZScEX8SfEmUGRHs0gxpqteO5nfNW6axyZbBdw9A12g= +golang.org/x/crypto v0.0.0-20210220033148-5ea612d1eb83/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200122134326-e047566fdf82 h1:ywK/j/KkyTHcdyYSZNXGjMwgmDSfjglYZ3vStQ/gSCU= -golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210309074719-68d13333faf2 h1:46ULzRKLh1CwgRq2dC5SlBzEqqNCi8rreOZnNrbqcIY= +golang.org/x/sys v0.0.0-20210309074719-68d13333faf2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= diff --git a/link.go b/link.go new file mode 100644 index 00000000..104fbbd0 --- /dev/null +++ b/link.go @@ -0,0 +1,99 @@ +package ipld + +import ( + "context" +) + +// Link is a special kind of value in IPLD which can be "loaded" to access more nodes. +// +// Nodes can be a Link: "link" is one of the kinds in the IPLD Data Model; +// and accordingly there is an `ipld.Kind_Link` enum value, and Node has an `AsLink` method. +// +// Links are considered a scalar value in the IPLD Data Model, +// but when "loaded", the result can be any other IPLD kind: +// maps, lists, strings, etc. +// +// Link is an interface in the go-ipld-prime implementation, +// but the most common instantiation of it comes from the `linking/cid` package, +// and represents CIDs (see https://github.com/multiformats/cid). +// +// The Link interface says very little by itself; it's generally necessary to +// use type assertions to unpack more specific forms of data. +// The only real contract is that the Link must be able to return a LinkPrototype, +// which must be able to produce new Link values of a similar form. +// (In practice: if you're familiar with CIDs: Link.Prototype is analogous to cid.Prefix.) +// +// The traversal package contains powerful features for walking through large graphs of Nodes +// while automatically loading and traversing links as the walk goes. +// +// Note that the Link interface should typically be inhabited by a struct or string, as opposed to a pointer. +// This is because Link is often desirable to be able to use as a golang map key, +// and in that context, pointers would not result in the desired behavior. +type Link interface { + // Prototype should return a LinkPrototype which carries the information + // to make more Link values similar to this one (but with different hashes). + Prototype() LinkPrototype + + // String should return a reasonably human-readable debug-friendly representation the Link. + // There is no contract that requires that the string be able to be parsed back into a Link value, + // but the string should be unique (e.g. not elide any parts of the hash). + String() string +} + +// LinkPrototype encapsulates any implementation details and parameters +// necessary for creating a Link, expect for the hash result itself. +// +// LinkPrototype, like Link, is an interface in go-ipld-prime, +// but the most common instantiation of it comes from the `linking/cid` package, +// and represents CIDs (see https://github.com/multiformats/cid). +// If using CIDs as an implementation, LinkPrototype will encapsulate information +// like multihashType, multicodecType, and cidVersion, for example. +// (LinkPrototype is analogous to cid.Prefix.) +type LinkPrototype interface { + // BuildLink should return a new Link value based on the given hashsum. + // The hashsum argument should typically be a value returned from a + // https://golang.org/pkg/hash/#Hash.Sum call. + // + // The hashsum reference must not be retained (the caller is free to reuse it). + BuildLink(hashsum []byte) Link +} + +// LinkContext is a structure carrying ancilary information that may be used +// while loading or storing data -- see its usage in BlockReadOpener, BlockWriteOpener, +// and in the methods on LinkSystem which handle loading and storing data. +// +// A zero value for LinkContext is generally acceptable in any functions that use it. +// In this case, any operations that need a Context will use Context.Background +// (thus being uncancellable) and simply have no additional information to work with. +type LinkContext struct { + // Ctx is the familiar golang Context pattern. + // Use this for cancellation, or attaching additional info + // (for example, perhaps to pass auth tokens through to the storage functions). + Ctx context.Context + + // Path where the link was encountered. May be zero. + // + // Functions in the traversal package will set this automatically. + LinkPath Path + + // When traversing data or encoding: the Node containing the link -- + // it may have additional type info, etc, that can be accessed. + // When building / decoding: not present. + // + // Functions in the traversal package will set this automatically. + LinkNode Node + + // When building data or decoding: the NodeAssembler that will be receiving the link -- + // it may have additional type info, etc, that can be accessed. + // When traversing / encoding: not present. + // + // Functions in the traversal package will set this automatically. + LinkNodeAssembler NodeAssembler + + // Parent of the LinkNode. May be zero. + // + // Functions in the traversal package will set this automatically. + ParentNode Node + + // REVIEW: ParentNode in LinkContext -- so far, this has only ever been hypothetically useful. Keep or drop? +} diff --git a/linking.go b/linking.go index efb007b6..658e2622 100644 --- a/linking.go +++ b/linking.go @@ -5,144 +5,165 @@ import ( "io" ) -// Link is a special kind of value in IPLD which can be "loaded" to access -// more nodes. -// -// Nodes can return a Link; this can be loaded manually, or, -// the traversal package contains powerful features for automatically -// traversing links through large trees of nodes. -// -// Links straddle somewhat awkwardly across the IPLD Layer Model: -// clearly not at the Schema layer (though schemas can define their parameters), -// partially at the Data Model layer (as they're recognizably in the Node interface), -// and also involved at some serial layer that we don't often talk about: -// linking -- since we're a content-addressed system at heart -- necessarily -// involves understanding of concrete serialization details: -// which encoding mechanisms to use, what string escaping, what hashing, etc, -// and indeed what concrete serial link representation itself to use. -// -// Link is an abstract interface so that we can describe Nodes without -// getting stuck on specific details of any link representation. -// In practice, you'll almost certainly use CIDs for linking. -// However, it's possible to bring your own Link implementations -// (though this'll almost certainly involve also bringing your own encoding -// systems; it's a lot of work). -// It's even possible to use IPLD *entirely without* any linking implementation, -// using it purely for json/cbor via the encoding packages and -// foregoing the advanced traversal features around transparent link loading. -// -// Link interfaces are usually inhabited by a struct or string or etc, and not a pointer. -// This is because Link is often desirable to be able to use as a golang map key, -// and in that context, pointers would not result in the desired behavior. -type Link interface { - // Load consumes serial data from a Loader and funnels the parsed - // data into a NodeAssembler. - // - // The provided Loader function is used to get a reader for the raw - // serialized content; the Link contains an understanding of how to - // select a decoder (and hasher for verification, etc); and the - // NodeAssembler accumulates the final results (which you can - // presumably access from elsewhere; Load is designed not to know - // about this). - Load(context.Context, LinkContext, NodeAssembler, Loader) error +// This file contains all the functions on LinkSystem. +// These are the helpful, user-facing functions we expect folks to use "most of the time" when loading and storing data. - // LinkBuilder returns a handle to any parameters of the Link which - // are needed to create a new Link of the same style but with new content. - // (It's much like the relationship of Node/NodeBuilder.) - // - // (If you're familiar with CIDs, you can think of this method as - // corresponding closely to `cid.Prefix()`, just more abstractly.) - LinkBuilder() LinkBuilder +// Varations: +// - Load vs Store vs ComputeLink +// - With or without LinkContext? +// - Brevity would be nice but I can't think of what to name the functions, so: everything takes LinkContext. Zero value is fine though. +// - [for load direction only]: Prototype (and return Node|error) or Assembler (and just return error)? +// - naming: Load vs Fill. +// - 'Must' variants. - // String should return a reasonably human-readable debug-friendly - // representation of a Link. It should only be used for debug and - // log message purposes; there is no contract that requires that the - // string be able to be parsed back into a reified Link. - String() string +// Can we get as far as a `QuickLoad(lnk Link) (Node, error)` function, which doesn't even ask you for a NodePrototype? +// No, not quite. (Alas.) If we tried to do so, and make it use `basicnode.Prototype`, we'd have import cycles; ded. + +func (lsys *LinkSystem) Load(lnkCtx LinkContext, lnk Link, np NodePrototype) (Node, error) { + nb := np.NewBuilder() + if err := lsys.Fill(lnkCtx, lnk, nb); err != nil { + return nil, err + } + return nb.Build(), nil } -// LinkBuilder encapsulates any implementation details and parameters -// necessary for taking a Node and converting it to a serial representation -// and returning a Link to that data. -// -// The serialized bytes will be routed through the provided Storer system, -// which is expected to store them in some way such that a related Loader -// system can later use the Link and an associated Loader to load nodes -// of identical content. -// -// LinkBuilder, like Link, is an abstract interface. -// If using CIDs as an implementation, LinkBuilder will encapsulate things -// like multihashType, multicodecType, and cidVersion, for example. -type LinkBuilder interface { - Build(context.Context, LinkContext, Node, Storer) (Link, error) +func (lsys *LinkSystem) MustLoad(lnkCtx LinkContext, lnk Link, np NodePrototype) Node { + if n, err := lsys.Load(lnkCtx, lnk, np); err != nil { + panic(err) + } else { + return n + } } -// Loader functions are used to get a reader for raw serialized content -// based on the lookup information in a Link. -// A loader function is used by providing it to a Link.Load() call. -// -// Loaders typically have some filesystem or database handle contained -// within their closure which is used to satisfy read operations. -// -// LinkContext objects can be provided to give additional information -// to the loader, and will be automatically filled out when a Loader -// is used by systems in the traversal package; most Loader implementations -// should also work fine when given the zero value of LinkContext. -// -// Loaders are implicitly coupled to a Link implementation and have some -// "extra" knowledge of the concrete Link type. This necessary since there is -// no mandated standard for how to serially represent Link itself, and such -// a representation is typically needed by a Storer implementation. -type Loader func(lnk Link, lnkCtx LinkContext) (io.Reader, error) +func (lsys *LinkSystem) Fill(lnkCtx LinkContext, lnk Link, na NodeAssembler) error { + if lnkCtx.Ctx == nil { + lnkCtx.Ctx = context.Background() + } + // Choose all the parts. + decoder, err := lsys.DecoderChooser(lnk) + if err != nil { + return ErrLinkingSetup{"could not choose a decoder", err} + } + hasher, err := lsys.HasherChooser(lnk.Prototype()) + if err != nil { + return ErrLinkingSetup{"could not choose a hasher", err} + } + if lsys.StorageReadOpener == nil { + return ErrLinkingSetup{"no storage configured for reading", io.ErrClosedPipe} // REVIEW: better cause? + } + // Open storage, read it, verify it, and feed the codec to assemble the nodes. + // We have two paths through this: if a `Bytes() []byte` method is handy, we'll assume it's faster than going through reader. + // These diverge significantly, because if we give up on streaming, it makes sense to do the full hash check first before decoding at all. + reader, err := lsys.StorageReadOpener(lnkCtx, lnk) + if err != nil { + return err + } + if buf, ok := reader.(interface{ Bytes() []byte }); ok { + // Flush everything to the hasher in one big slice. + hasher.Write(buf.Bytes()) + hash := hasher.Sum(nil) + // Bit of a jig to get something we can do the hash equality check on. + lnk2 := lnk.Prototype().BuildLink(hash) + if lnk2 != lnk { + return ErrHashMismatch{Actual: lnk2, Expected: lnk} + } + // Perform decoding (knowing the hash is already verified). + // Note that the decoder recieves the same reader as we started with, + // and as a result, is also free to detect a `Bytes() []byte` accessor and do any optimizations it wishes to based on that. + return decoder(na, reader) + } else { + // Tee the stream so that the hasher is fed as the unmarshal progresses through the stream. + // Note: the tee means *the decoder doesn't get to see the original reader type*. + // This is part of why the `Bytes() []byte` branch above is useful; the decoder loses any ability to do a similar check + // and optimization when the tee is in the middle. + tee := io.TeeReader(reader, hasher) + decodeErr := decoder(na, tee) + if decodeErr != nil { // It is important to security to check the hash before returning any other observation about the content. + // This copy is for data remaining the block that wasn't already pulled through the TeeReader by the decoder. + _, err := io.Copy(hasher, reader) + if err != nil { + return err + } + } + hash := hasher.Sum(nil) + // Bit of a jig to get something we can do the hash equality check on. + lnk2 := lnk.Prototype().BuildLink(hash) + if lnk2 != lnk { + return ErrHashMismatch{Actual: lnk2, Expected: lnk} + } + if decodeErr != nil { + return decodeErr + } + return nil + } +} -// Storer functions are used to a get a writer for raw serialized content, -// which will be committed to storage indexed by Link. -// A stoerer function is used by providing it to a LinkBuilder.Build() call. -// -// The storer system comes in two parts: the Storer itself *starts* a storage -// operation (presumably to some e.g. tempfile) and returns a writer; the -// StoreCommitter returned with the writer is used to *commit* the final storage -// (much like a 'Close' operation for the writer). -// -// Storers typically have some filesystem or database handle contained -// within their closure which is used to satisfy read operations. -// -// LinkContext objects can be provided to give additional information -// to the storer, and will be automatically filled out when a Storer -// is used by systems in the traversal package; most Storer implementations -// should also work fine when given the zero value of LinkContext. -// -// Storers are implicitly coupled to a Link implementation and have some -// "extra" knowledge of the concrete Link type. This necessary since there is -// no mandated standard for how to serially represent Link itself, and such -// a representation is typically needed by a Storer implementation. -type Storer func(lnkCtx LinkContext) (io.Writer, StoreCommitter, error) +func (lsys *LinkSystem) MustFill(lnkCtx LinkContext, lnk Link, na NodeAssembler) { + if err := lsys.Fill(lnkCtx, lnk, na); err != nil { + panic(err) + } +} -// StoreCommitter is a thunk returned by a Storer which is used to "commit" -// the storage operation. It should be called after the associated writer -// is finished, similar to a 'Close' method, but further takes a Link parameter, -// which is the identity of the content. Typically, this will cause an atomic -// operation in the storage system to move the already-written content into -// a final place (e.g. rename a tempfile) determined by the Link. -// (The Link parameter is necessarily only given at the end of the process -// rather than the beginning to so that we can have content-addressible -// semantics while also supporting streaming writes.) -type StoreCommitter func(Link) error +func (lsys *LinkSystem) Store(lnkCtx LinkContext, lp LinkPrototype, n Node) (Link, error) { + if lnkCtx.Ctx == nil { + lnkCtx.Ctx = context.Background() + } + // Choose all the parts. + encoder, err := lsys.EncoderChooser(lp) + if err != nil { + return nil, ErrLinkingSetup{"could not choose an encoder", err} + } + hasher, err := lsys.HasherChooser(lp) + if err != nil { + return nil, ErrLinkingSetup{"could not choose a hasher", err} + } + if lsys.StorageWriteOpener == nil { + return nil, ErrLinkingSetup{"no storage configured for writing", io.ErrClosedPipe} // REVIEW: better cause? + } + // Open storage write stream, feed serial data to the storage and the hasher, and funnel the codec output into both. + writer, commitFn, err := lsys.StorageWriteOpener(lnkCtx) + if err != nil { + return nil, err + } + tee := io.MultiWriter(writer, hasher) + err = encoder(n, tee) + if err != nil { + return nil, err + } + lnk := lp.BuildLink(hasher.Sum(nil)) + return lnk, commitFn(lnk) +} -// LinkContext is a parameter to Storer and Loader functions. -// -// An example use of LinkContext might be inspecting the LinkNode, and if -// it's a typed node, inspecting its Type property; then, a Loader might -// deciding on whether or not we want to load objects of that Type. -// This might be used to do a traversal which looks at all directory objects, -// but not file contents, for example. -type LinkContext struct { - LinkPath Path - LinkNode Node // has the Link again, but also might have type info // always zero for writing new nodes, for obvi reasons. - ParentNode Node +func (lsys *LinkSystem) MustStore(lnkCtx LinkContext, lp LinkPrototype, n Node) Link { + if lnk, err := lsys.Store(lnkCtx, lp, n); err != nil { + panic(err) + } else { + return lnk + } } -// n.b. if I had java, this would all indeed be generic: -// `Link<$T>`, `LinkBuilder<$T>`, `Storer<$T>`, etc would be an explicit family. -// ... Then again, in java, that'd prevent composition of a Storer or Loader -// which could support more than one concrete type, so. ¯\_(ツ)_/¯ +// ComputeLink returns a Link for the given data, but doesn't do anything else +// (e.g. it doesn't try to store any of the serial-form data anywhere else). +func (lsys *LinkSystem) ComputeLink(lp LinkPrototype, n Node) (Link, error) { + encoder, err := lsys.EncoderChooser(lp) + if err != nil { + return nil, ErrLinkingSetup{"could not choose an encoder", err} + } + hasher, err := lsys.HasherChooser(lp) + if err != nil { + return nil, ErrLinkingSetup{"could not choose a hasher", err} + } + err = encoder(n, hasher) + if err != nil { + return nil, err + } + return lp.BuildLink(hasher.Sum(nil)), nil +} + +func (lsys *LinkSystem) MustComputeLink(lp LinkPrototype, n Node) Link { + if lnk, err := lsys.ComputeLink(lp, n); err != nil { + panic(err) + } else { + return lnk + } +} diff --git a/linking/cid/HACKME.md b/linking/cid/HACKME.md new file mode 100644 index 00000000..301885a4 --- /dev/null +++ b/linking/cid/HACKME.md @@ -0,0 +1,57 @@ +Why does this package exist? +---------------------------- + +The `linking/cid` package bends the `github.com/ipfs/go-cid` package into conforming to the `ipld.Link` interface. + +The `linking/cid` package also contains factory functions for `ipld.LinkSystem`. +These LinkSystem will be constructed with `EncoderChooser`, `DecoderChooser`, and `HasherChooser` funcs +which will use multicodec registries and multihash registries respectively. + +### Why not use go-cid directly? + +We need a "Link" interface in the root `ipld` package or things just aren't definable. +But we don't want the root `ipld.Link` concept to directly map to `go-cid.Cid` for several reasons: + +1. We might want to revisit the go-cid library. Possibly in the "significantly breaking changes" sense. + - It's also not clear when we might do this -- and if we do, the transition period will be *long* because it's a highly-depended-upon library. + - See below for some links to a gist that discusses why. +2. We might want to extend the concept of linking to more than just plain CIDs. + - This is hypothetical at present -- but an often-discussed example is "what if CID+Path was also a Link?" +3. We might sometimes want to use IPLD libraries without using any CID implementation at all. + - e.g. it's totally believable to want to use IPLD libraries for handling JSON and CBOR, even if you don't want IPLD linking. + - if the CID packages were cheap enough, maybe this concern would fade -- but right now, they're **definitely** not; the transitive dependency tree of go-cid is *huge*. + +#### If go-cid is revisited, what might that look like? + +No idea. (At least, not in a committal way.) + +https://gist.github.com/warpfork/e871b7fee83cb814fb1f043089983bb3#existing-implementations +gathers some reflections on the problems that would be nice to solve, though. + +https://gist.github.com/warpfork/e871b7fee83cb814fb1f043089983bb3#file-cid-go +contains a draft outline of what a revisited API could look like, +but note that at the time of writing, it is not strongly ratified nor in any way committed to. + +At any rate, though, the operative question for this package is: +if we do revisit go-cid, how are we going to make the transition managable? + +It seems unlikely we'd be able to make the transition manageable without some interface, somewhere. +So we might as well draw that line at `ipld.Link`. + +(I hypothesize that a transition story might involve two CID packages, +which could grow towards a shared interface, +doing so in a way that's purely additive in the established `go-cid` package. +We'd need two separate go modules to do this, since the aim is reducing dependency bloat for those that use the new one. +The shared interface in this story could have more info than `ipld.Link` does now, +but would nonetheless still certainly be an interface in order to support the separation of modules.) + +### Why are LinkSystem factory functions here, instead of in the main IPLD package? + +Same reason as why we don't use go-cid directly. + +If we put these LinkSystem defaults in the root `ipld` package, +we'd bring on all the transitive dependencies of `go-cid` onto an user of `ipld` unconditionally... +and we don't want to do that. + +You know that Weird Al song "It's all about the pentiums"? +Retune that in your mind to "It's all about dependencies". diff --git a/linking/cid/cidLink.go b/linking/cid/cidLink.go index c785fa5f..31f1e703 100644 --- a/linking/cid/cidLink.go +++ b/linking/cid/cidLink.go @@ -1,18 +1,16 @@ package cidlink import ( - "bytes" - "context" "fmt" - "io" cid "github.com/ipfs/go-cid" ipld "github.com/ipld/go-ipld-prime" + multihash "github.com/multiformats/go-multihash" ) var ( - _ ipld.Link = Link{} - _ ipld.LinkBuilder = LinkBuilder{} + _ ipld.Link = Link{} + _ ipld.LinkPrototype = LinkPrototype{} ) // Link implements the ipld.Link interface using a CID. @@ -26,92 +24,50 @@ type Link struct { cid.Cid } -// byteAccessor is a reader interface that can access underlying bytes -type byteAccesor interface { - Bytes() []byte -} - -func (lnk Link) Load(ctx context.Context, lnkCtx ipld.LinkContext, na ipld.NodeAssembler, loader ipld.Loader) error { - // Open the byte reader. - r, err := loader(lnk, lnkCtx) - if err != nil { - return err - } - // Tee into hash checking and unmarshalling. - mcDecoder, exists := multicodecDecodeTable[lnk.Prefix().Codec] - if !exists { - return fmt.Errorf("no decoder registered for multicodec %d", lnk.Prefix().Codec) - } - var hasherBytes []byte - var decodeErr error - byteBuf, ok := r.(byteAccesor) - if ok { - hasherBytes = byteBuf.Bytes() - decodeErr = mcDecoder(na, r) - } else { - var hasher bytes.Buffer // multihash only exports bulk use, which is... really inefficient and should be fixed. - decodeErr = mcDecoder(na, io.TeeReader(r, &hasher)) - // Error checking order here is tricky. - // If decoding errored out, we should still run the reader to the end, to check the hash. - // (We still don't implement this by running the hash to the end first, because that would increase the high-water memory requirement.) - // ((Which we experience right now anyway because multihash's interface is silly, but we're acting as if that's fixed or will be soon.)) - // If the hash is rejected, we should return that error (and even if there was a decodeErr, it becomes irrelevant). - if decodeErr != nil { - _, err := io.Copy(&hasher, r) - if err != nil { - return err - } - } - hasherBytes = hasher.Bytes() - } - - cid, err := lnk.Prefix().Sum(hasherBytes) - if err != nil { - return err - } - if cid != lnk.Cid { - return fmt.Errorf("hash mismatch! %q (actual) != %q (expected)", cid, lnk.Cid) - } - if decodeErr != nil { - return decodeErr - } - return nil -} -func (lnk Link) LinkBuilder() ipld.LinkBuilder { - return LinkBuilder{lnk.Cid.Prefix()} +func (lnk Link) Prototype() ipld.LinkPrototype { + return LinkPrototype{lnk.Cid.Prefix()} } func (lnk Link) String() string { return lnk.Cid.String() } -type LinkBuilder struct { +type LinkPrototype struct { cid.Prefix } -func (lb LinkBuilder) Build(ctx context.Context, lnkCtx ipld.LinkContext, node ipld.Node, storer ipld.Storer) (ipld.Link, error) { - // Open the byte writer. - w, commit, err := storer(lnkCtx) - if err != nil { - return nil, err +func (lp LinkPrototype) BuildLink(hashsum []byte) ipld.Link { + // Does this method body look surprisingly complex? I agree. + // We actually have to do all this work. The go-cid package doesn't expose a constructor that just lets us directly set the bytes and the prefix numbers next to each other. + // No, `cid.Prefix.Sum` is not the method you are looking for: that expects the whole data body. + // Most of the logic here is the same as the body of `cid.Prefix.Sum`; we just couldn't get at the relevant parts without copypasta. + // There is also some logic that's sort of folded in from the go-multihash module. This is really a mess. + // The go-cid package needs review. So does go-multihash. Their responsibilies are not well compartmentalized and they don't play well with other stdlib golang interfaces. + p := lp.Prefix + + length := p.MhLength + if p.MhType == multihash.ID { + length = -1 } - // Marshal, teeing into the storage writer and the hasher. - mcEncoder, exists := multicodecEncodeTable[lb.Prefix.Codec] - if !exists { - return nil, fmt.Errorf("no encoder registered for multicodec %d", lb.Prefix.Codec) + if p.Version == 0 && (p.MhType != multihash.SHA2_256 || + (p.MhLength != 32 && p.MhLength != -1)) { + panic(fmt.Errorf("invalid cid v0 prefix")) } - var hasher bytes.Buffer // multihash-via-cid only exports bulk use, which is... really inefficient and should be fixed. - w = io.MultiWriter(&hasher, w) - err = mcEncoder(node, w) - if err != nil { - return nil, err + + if length != -1 { + hashsum = hashsum[:p.MhLength] } - cid, err := lb.Prefix.Sum(hasher.Bytes()) + + mh, err := multihash.Encode(hashsum, p.MhType) if err != nil { - return nil, err + panic(err) // No longer possible, but multihash still returns an error for legacy reasons. } - lnk := Link{cid} - if err := commit(lnk); err != nil { - return lnk, err + + switch lp.Prefix.Version { + case 0: + return Link{cid.NewCidV0(mh)} + case 1: + return Link{cid.NewCidV1(p.Codec, mh)} + default: + panic(fmt.Errorf("invalid cid version")) } - return lnk, nil } diff --git a/linking/cid/linksystem.go b/linking/cid/linksystem.go new file mode 100644 index 00000000..2c5a996a --- /dev/null +++ b/linking/cid/linksystem.go @@ -0,0 +1,53 @@ +package cidlink + +import ( + "fmt" + "hash" + + "github.com/multiformats/go-multihash/core" + + "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime/multicodec" +) + +func DefaultLinkSystem() ipld.LinkSystem { + return ipld.LinkSystem{ + EncoderChooser: func(lp ipld.LinkPrototype) (ipld.Encoder, error) { + switch lp2 := lp.(type) { + case LinkPrototype: + fn, err := multicodec.LookupEncoder(lp2.GetCodec()) + if err != nil { + return nil, err + } + return fn, nil + default: + return nil, fmt.Errorf("this encoderChooser can only handle cidlink.LinkPrototype; got %T", lp) + } + }, + DecoderChooser: func(lnk ipld.Link) (ipld.Decoder, error) { + lp := lnk.Prototype() + switch lp2 := lp.(type) { + case LinkPrototype: + fn, err := multicodec.LookupDecoder(lp2.GetCodec()) + if err != nil { + return nil, err + } + return fn, nil + default: + return nil, fmt.Errorf("this decoderChooser can only handle cidlink.LinkPrototype; got %T", lp) + } + }, + HasherChooser: func(lp ipld.LinkPrototype) (hash.Hash, error) { + switch lp2 := lp.(type) { + case LinkPrototype: + h, err := multihash.GetHasher(lp2.MhType) + if err != nil { + return nil, fmt.Errorf("no hasher registered for multihash indicator 0x%x: %w", lp2.MhType, err) + } + return h, nil + default: + return nil, fmt.Errorf("this hasherChooser can only handle cidlink.LinkPrototype; got %T", lp) + } + }, + } +} diff --git a/linking/cid/multicodec.go b/linking/cid/multicodec.go deleted file mode 100644 index 510e0411..00000000 --- a/linking/cid/multicodec.go +++ /dev/null @@ -1,42 +0,0 @@ -package cidlink - -import ( - "io" - - ipld "github.com/ipld/go-ipld-prime" -) - -type MulticodecDecodeTable map[uint64]MulticodecDecoder - -type MulticodecEncodeTable map[uint64]MulticodecEncoder - -// MulticodecDecoder builds an ipld.Node by unmarshalling bytes and funnelling -// the data tree into an ipld.NodeAssembler. The resulting Node is not -// returned; typically you call this function with an ipld.NodeBuilder, -// and you can extract the result from there. -// -// MulticodecDecoder are used by registering them in a MulticodecDecoderTable, -// which makes them available to be used internally by cidlink.Link.Load. -// -// Consider implementing decoders to probe their NodeBuilder to see if it -// has special features that may be able to do the job more efficiently. -// For example, ipldcbor.NodeBuilder has special unmarshaller functions -// that know how to fastpath their work *if* we're doing a cbor decode; -// if possible, detect and use that; if not, fall back to general generic -// NodeBuilder usage. -type MulticodecDecoder func(ipld.NodeAssembler, io.Reader) error - -// MulticodecEncoder marshals and ipld.Node into bytes and sends them to -// an io.Writer. -// -// MulticodecEncoder are used by registering them in a MulticodecEncoderTable, -// which makes them available to be used internally by cidlink.LinkBuilder. -// -// Tends to be implemented by probing the node to see if it matches a special -// interface that we know can do this particular kind of encoding -// (e.g. if you're using ipldgit.Node and making a MulticodecEncoder to register -// as the rawgit multicodec, you'll probe for that specific thing, since it's -// implemented on the node itself), -// but may also be able to work based on the ipld.Node interface alone -// (e.g. you can do dag-cbor to any kind of Node). -type MulticodecEncoder func(ipld.Node, io.Writer) error diff --git a/linking/cid/multicodecRegistry.go b/linking/cid/multicodecRegistry.go deleted file mode 100644 index f24f8dec..00000000 --- a/linking/cid/multicodecRegistry.go +++ /dev/null @@ -1,35 +0,0 @@ -package cidlink - -import "fmt" - -var ( - multicodecDecodeTable MulticodecDecodeTable - multicodecEncodeTable MulticodecEncodeTable -) - -func init() { - multicodecEncodeTable = make(MulticodecEncodeTable) - multicodecDecodeTable = make(MulticodecDecodeTable) -} - -// RegisterMulticodecDecoder is used to register multicodec features. -// It adjusts a global registry and may only be used at program init time; -// it is meant to provide a plugin system, not a configuration mechanism. -func RegisterMulticodecDecoder(hook uint64, fn MulticodecDecoder) { - _, exists := multicodecDecodeTable[hook] - if exists { - panic(fmt.Errorf("multicodec decoder already registered for %x", hook)) - } - multicodecDecodeTable[hook] = fn -} - -// RegisterMulticodecEncoder is used to register multicodec features. -// It adjusts a global registry and may only be used at program init time; -// it is meant to provide a plugin system, not a configuration mechanism. -func RegisterMulticodecEncoder(hook uint64, fn MulticodecEncoder) { - _, exists := multicodecEncodeTable[hook] - if exists { - panic(fmt.Errorf("multicodec encoder already registered for %x", hook)) - } - multicodecEncodeTable[hook] = fn -} diff --git a/linkingExamples_test.go b/linkingExamples_test.go new file mode 100644 index 00000000..bdc873dc --- /dev/null +++ b/linkingExamples_test.go @@ -0,0 +1,133 @@ +package ipld_test + +import ( + "fmt" + + "github.com/ipfs/go-cid" + + "github.com/ipld/go-ipld-prime" + _ "github.com/ipld/go-ipld-prime/codec/dagcbor" + "github.com/ipld/go-ipld-prime/fluent" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" + basicnode "github.com/ipld/go-ipld-prime/node/basic" + "github.com/ipld/go-ipld-prime/storage" +) + +// storage is a map where we'll store serialized IPLD data. +// +// ExampleCreatingLink will put data into this; +// ExampleLoadingLink will read out from it. +// +// In a real program, you'll probably make functions to load and store from disk, +// or some network storage, or... whatever you want, really :) +var store = storage.Memory{} + +// TODO: These examples are really heavy on CIDs and the multicodec and multihash magic tables. +// It would be good to have examples that create and use less magical LinkSystem constructions, too. + +func ExampleStoringLink() { + // Creating a Link is done by choosing a concrete link implementation (typically, CID), + // getting a LinkSystem that knows how to work with that, and then using the LinkSystem methods. + + // Let's get a LinkSystem. We're going to be working with CID links, + // so let's get the default LinkSystem that's ready to work with those. + lsys := cidlink.DefaultLinkSystem() + + // We want to store the serialized data somewhere. + // We'll use an in-memory store for this. (It's a package scoped variable.) + // You can use any kind of storage system here; + // you just need a function that conforms to the ipld.BlockWriteOpener interface. + lsys.StorageWriteOpener = (&store).OpenWrite + + // To create any links, first we need a LinkPrototype. + // This gathers together any parameters that might be needed when making a link. + // (For CIDs, the version, the codec, and the multihash type are all parameters we'll need.) + // Often, you can probably make this a constant for your whole application. + lp := cidlink.LinkPrototype{cid.Prefix{ + Version: 1, // Usually '1'. + Codec: 0x71, // 0x71 means "dag-cbor" -- See the multicodecs table: https://github.com/multiformats/multicodec/ + MhType: 0x13, // 0x20 means "sha2-512" -- See the multicodecs table: https://github.com/multiformats/multicodec/ + MhLength: 64, // sha2-512 hash has a 64-byte sum. + }} + + // And we need some data to link to! Here's a quick piece of example data: + n := fluent.MustBuildMap(basicnode.Prototype.Map, 1, func(na fluent.MapAssembler) { + na.AssembleEntry("hello").AssignString("world") + }) + + // Before we use the LinkService, NOTE: + // There's a side-effecting import at the top of the file. It's for the dag-cbor codec. + // The CID LinkSystem defaults use a global registry called the multicodec table; + // and the multicodec table is populated in part by the dag-cbor package when it's first imported. + // You'll need that side-effecting import, too, to copy this example. + // It can happen anywhere in your program; once, in any package, is enough. + // If you don't have this import, the codec will not be registered in the multicodec registry, + // and when you use the LinkSystem we got from the cidlink package, it will return an error of type ErrLinkingSetup. + // If you initialize a custom LinkSystem, you can control this more directly; + // these registry systems are only here as defaults. + + // Now: time to apply the LinkSystem, and do the actual store operation! + lnk, err := lsys.Store( + ipld.LinkContext{}, // The zero value is fine. Configure it it you want cancellability or other features. + lp, // The LinkPrototype says what codec and hashing to use. + n, // And here's our data. + ) + if err != nil { + panic(err) + } + + // That's it! We got a link. + fmt.Printf("link: %s\n", lnk) + fmt.Printf("concrete type: `%T`\n", lnk) + + // Remember: the serialized data was also stored to the 'store' variable as a side-effect. + // (We set this up back when we customized the LinkSystem.) + // We'll pick this data back up again in the example for loading. + + // Output: + // link: bafyrgqhai26anf3i7pips7q22coa4sz2fr4gk4q4sqdtymvvjyginfzaqewveaeqdh524nsktaq43j65v22xxrybrtertmcfxufdam3da3hbk + // concrete type: `cidlink.Link` +} + +func ExampleLoadingLink() { + // Let's say we want to load this link (it's the same one we just created in the example above). + cid, _ := cid.Decode("bafyrgqhai26anf3i7pips7q22coa4sz2fr4gk4q4sqdtymvvjyginfzaqewveaeqdh524nsktaq43j65v22xxrybrtertmcfxufdam3da3hbk") + lnk := cidlink.Link{cid} + + // Let's get a LinkSystem. We're going to be working with CID links, + // so let's get the default LinkSystem that's ready to work with those. + // (This is the same as we did in ExampleStoringLink.) + lsys := cidlink.DefaultLinkSystem() + + // We need somewhere to go looking for any of the data we might want to load! + // We'll use an in-memory store for this. (It's a package scoped variable.) + // (This particular memory store was filled with the data we'll load earlier, during ExampleStoringLink.) + // You can use any kind of storage system here; + // you just need a function that conforms to the ipld.BlockReadOpener interface. + lsys.StorageReadOpener = (&store).OpenRead + + // We'll need to decide what in-memory implementation of ipld.Node we want to use. + // Here, we'll use the "basicnode" implementation. This is a good getting-started choice. + // But you could also use other implementations, or even a code-generated type with special features! + np := basicnode.Prototype.Any + + // Before we use the LinkService, NOTE: + // There's a side-effecting import at the top of the file. It's for the dag-cbor codec. + // See the comments in ExampleStoringLink for more discussion of this and why it's important. + + // Apply the LinkSystem, and ask it to load our link! + n, err := lsys.Load( + ipld.LinkContext{}, // The zero value is fine. Configure it it you want cancellability or other features. + lnk, // The Link we want to load! + np, // The NodePrototype says what kind of Node we want as a result. + ) + if err != nil { + panic(err) + } + + // Tada! We have the data as node that we can traverse and use as desired. + fmt.Printf("we loaded a %s with %d entries\n", n.Kind(), n.Length()) + + // Output: + // we loaded a map with 1 entries +} diff --git a/linksystem.go b/linksystem.go new file mode 100644 index 00000000..88942198 --- /dev/null +++ b/linksystem.go @@ -0,0 +1,219 @@ +package ipld + +import ( + "fmt" + "hash" + "io" +) + +// LinkSystem is a struct that composes all the individual functions +// needed to load and store content addressed data using IPLD -- +// encoding functions, hashing functions, and storage connections -- +// and then offers the operations a user wants -- Store and Load -- as methods. +// +// Typically, the functions which are fields of LinkSystem are not used +// directly by users (except to set them, when creating the LinkSystem), +// and it's the higher level operations such as Store and Load that user code then calls. +// +// The most typical way to get a LinkSystem is from the linking/cid package, +// which has a factory function called DefaultLinkSystem. +// The LinkSystem returned by that function will be based on CIDs, +// and use the multicodec registry and multihash registry to select encodings and hashing mechanisms. +// The BlockWriteOpener and BlockReadOpener must still be provided by the user; +// otherwise, only the ComputeLink method will work. +// +// Some implementations of BlockWriteOpener and BlockReadOpener may be +// found in the storage package. Applications are also free to write their own. +// Custom wrapping of BlockWriteOpener and BlockReadOpener are also common, +// and may be reasonable if one wants to build application features that are block-aware. +type LinkSystem struct { + EncoderChooser func(LinkPrototype) (Encoder, error) + DecoderChooser func(Link) (Decoder, error) + HasherChooser func(LinkPrototype) (hash.Hash, error) + StorageWriteOpener BlockWriteOpener + StorageReadOpener BlockReadOpener +} + +// The following two types define the two directions of transform that a codec can be expected to perform: +// from Node to serial stream, and from serial stream to Node (via a NodeAssembler). +// +// You'll find a couple of implementations matching this shape in subpackages of 'codec' in this module +// (these are the handful of encoders and decoders we ship as "batteries included"). +// Other encoder and decoder implementations can be found in other repositories/modules. +// It should also be easy to implement encodecs and decoders of your own! +// +// Encoder and Decoder functions can be used on their own, but are also often used via the LinkSystem construction, +// which handles all the other related operations necessary for a content-addressed storage system at once. +// +// Encoder and Decoder functions can be registered in the multicodec table in the `codec` package +// if they're providing functionality that matches the expectations for a multicodec identifier. +// This table will be used by some common EncoderChooser and DecoderChooser implementations +// (namely, the ones in LinkSystems produced by the `linking/cid` package). +type ( + // Encoder defines the shape of a function which traverses a Node tree + // and emits its data in a serialized form into an io.Writer. + // + // The dual of Encoder is a Decoder, which takes a NodeAssembler + // and fills it with deserialized data consumed from an io.Reader. + // Typically, Decoder and Encoder functions will be found in pairs, + // and will be expected to be able to round-trip each other's data. + // + // Encoder functions can be used directly. + // Encoder functions are also often used via a LinkSystem when working with content-addressed storage. + // LinkSystem methods will helpfully handle the entire process of traversing a Node tree, + // encoding this data, hashing it, streaming it to the writer, and committing it -- all as one step. + // + // An Encoder works with Nodes. + // If you have a native golang structure, and want to serialize it using an Encoder, + // you'll need to figure out how to transform that golang structure into an ipld.Node tree first. + // + // It may be useful to understand "multicodecs" when working with Encoders. + // In IPLD, a system called "multicodecs" is typically used to describe encoding foramts. + // A "multicodec indicator" is a number which describes an encoding; + // the Link implementations used in IPLD (CIDs) store a multicodec indicator in the Link; + // and in this library, a multicodec registry exists in the `codec` package, + // and can be used to associate a multicodec indicator number with an Encoder function. + // The default EncoderChooser in a LinkSystem will use this multicodec registry to select Encoder functions. + // However, you can construct a LinkSystem that uses any EncoderChooser you want. + // It is also possible to have and use Encoder functions that aren't registered as a multicodec at all... + // we just recommend being cautious of this, because it may make your data less recognizable + // when working with other systems that use multicodec indicators as part of their communication. + Encoder func(Node, io.Writer) error + + // Decoder defines the shape of a function which produces a Node tree + // by reading serialized data from an io.Reader. + // (Decoder doesn't itself return a Node directly, but rather takes a NodeAssembler as an argument, + // because this allows the caller more control over the Node implementation, + // as well as some control over allocations.) + // + // The dual of Decoder is an Encoder, which takes a Node and + // emits its data in a serialized form into an io.Writer. + // Typically, Decoder and Encoder functions will be found in pairs, + // and will be expected to be able to round-trip each other's data. + // + // Decoder functions can be used directly. + // Decoder functions are also often used via a LinkSystem when working with content-addressed storage. + // LinkSystem methods will helpfully handle the entire process of opening block readers, + // verifying the hash of the data stream, and applying a Decoder to build Nodes -- all as one step. + // + // A Decoder works with Nodes. + // If you have a native golang structure, and want to populate it with data using a Decoder, + // you'll need to either get a NodeAssembler which proxies data into that structure directly, + // or assemble a Node as intermediate storage and copy the data to the native structure as a separate step. + // + // It may be useful to understand "multicodecs" when working with Decoders. + // See the documentation on the Encoder function interface for more discussion of multicodecs, + // the multicodec table, and how this is typically connected to linking. + Decoder func(NodeAssembler, io.Reader) error +) + +// The following three types are the key functionality we need from a "blockstore". +// +// Some libraries might provide a "blockstore" object that has these as methods; +// it may also have more methods (like enumeration features, GC features, etc), +// but IPLD doesn't generally concern itself with those. +// We just need these key things, so we can "put" and "get". +// +// The functions are a tad more complicated than "put" and "get" so that they have good mechanical sympathy. +// In particular, the writing/"put" side is broken into two phases, so that the abstraction +// makes it easy to begin to write data before the hash that will identify it is fully computed. +type ( + // BlockReadOpener defines the shape of a function used to + // open a reader for a block of data. + // + // In a content-addressed system, the Link parameter should be only + // determiner of what block body is returned. + // + // The LinkContext may be zero, or may be used to carry extra information: + // it may be used to carry info which hints at different storage pools; + // it may be used to carry authentication data; etc. + // (Any such behaviors are something that a BlockReadOpener implementation + // will needs to document at a higher detail level than this interface specifies. + // In this interface, we can only note that it is possible to pass such information opaquely + // via the LinkContext or by attachments to the general-purpose Context it contains.) + // The LinkContext should not have effect on the block body returned, however; + // at most should only affect data availability + // (e.g. whether any block body is returned, versus an error). + // + // Reads are cancellable by cancelling the LinkContext.Context. + // + // Other parts of the IPLD library suite (such as the traversal package, and all its functions) + // will typically take a Context as a parameter or piece of config from the caller, + // and will pass that down through the LinkContext, meaning this can be used to + // carry information as well as cancellation control all the way through the system. + // + // BlockReadOpener is typically not used directly, but is instead + // composed in a LinkSystem and used via the methods of LinkSystem. + // LinkSystem methods will helpfully handle the entire process of opening block readers, + // verifying the hash of the data stream, and applying a Decoder to build Nodes -- all as one step. + // + // BlockReadOpener implementations are not required to validate that + // the contents which will be streamed out of the reader actually match + // and hash in the Link parameter before returning. + // (This is something that the LinkSystem composition will handle if you're using it.) + // + // Some implementations of BlockWriteOpener and BlockReadOpener may be + // found in the storage package. Applications are also free to write their own. + BlockReadOpener func(LinkContext, Link) (io.Reader, error) + + // BlockWriteOpener defines the shape of a function used to open a writer + // into which data can be streamed, and which will eventually be "commited". + // Committing is done using the BlockWriteCommitter returned by using the BlockWriteOpener, + // and finishes the write along with requiring stating the Link which should identify this data for future reading. + // + // The LinkContext may be zero, or may be used to carry extra information: + // it may be used to carry info which hints at different storage pools; + // it may be used to carry authentication data; etc. + // + // Writes are cancellable by cancelling the LinkContext.Context. + // + // Other parts of the IPLD library suite (such as the traversal package, and all its functions) + // will typically take a Context as a parameter or piece of config from the caller, + // and will pass that down through the LinkContext, meaning this can be used to + // carry information as well as cancellation control all the way through the system. + // + // BlockWriteOpener is typically not used directly, but is instead + // composed in a LinkSystem and used via the methods of LinkSystem. + // LinkSystem methods will helpfully handle the entire process of traversing a Node tree, + // encoding this data, hashing it, streaming it to the writer, and committing it -- all as one step. + // + // BlockWriteOpener implementations are expected to start writing their content immediately, + // and later, the returned BlockWriteCommitter should also be able to expect that + // the Link which it is given is a reasonable hash of the content. + // (To give an example of how this might be efficiently implemented: + // One might imagine that if implementing a disk storage mechanism, + // the io.Writer returned from a BlockWriteOpener will be writing a new tempfile, + // and when the BlockWriteCommiter is called, it will flush the writes + // and then use a rename operation to place the tempfile in a permanent path based the Link.) + // + // Some implementations of BlockWriteOpener and BlockReadOpener may be + // found in the storage package. Applications are also free to write their own. + BlockWriteOpener func(LinkContext) (io.Writer, BlockWriteCommitter, error) + + // BlockWriteCommitter defines the shape of a function which, together + // with BlockWriteOpener, handles the writing and "committing" of a write + // to a content-addressable storage system. + // + // BlockWriteCommitter is a function which is will be called at the end of a write process. + // It should flush any buffers and close the io.Writer which was + // made available earlier from the BlockWriteOpener call that also returned this BlockWriteCommitter. + // + // BlockWriteCommitter takes a Link parameter. + // This Link is expected to be a reasonable hash of the content, + // so that the BlockWriteCommitter can use this to commit the data to storage + // in a content-addressable fashion. + // See the documentation of BlockWriteOpener for more description of this + // and an example of how this is likely to be reduced to practice. + BlockWriteCommitter func(Link) error +) + +// ErrLinkingSetup is returned by methods on LinkSystem when some part of the system is not set up correctly, +// or when one of the components refuses to handle a Link or LinkPrototype given. +// (It is not yielded for errors from the storage nor codec systems once they've started; those errors rise without interference.) +type ErrLinkingSetup struct { + Detail string // Perhaps an enum here as well, which states which internal function was to blame? + Cause error +} + +func (e ErrLinkingSetup) Error() string { return fmt.Sprintf("%s: %v", e.Detail, e.Cause) } +func (e ErrLinkingSetup) Unwrap() error { return e.Cause } diff --git a/multicodec/multicodec.go b/multicodec/multicodec.go new file mode 100644 index 00000000..2898834d --- /dev/null +++ b/multicodec/multicodec.go @@ -0,0 +1,110 @@ +package multicodec + +import ( + "fmt" + + "github.com/ipld/go-ipld-prime" +) + +var encoderRegistry = make(map[uint64]ipld.Encoder) +var decoderRegistry = make(map[uint64]ipld.Decoder) + +// RegisterEncoder updates a simple map of multicodec indicator number to ipld.Encoder function. +// The encoder functions registered can be subsequently looked up using LookupEncoder. +// +// Packages which implement an IPLD codec and have a multicodec number reserved in +// https://github.com/multiformats/multicodec/blob/master/table.csv +// are encouraged to register themselves in this map at package init time. +// (Doing this at package init time ensures this map can be accessed without race conditions.) +// +// This registry map is only used for default behaviors. +// For example, linking/cid.DefaultLinkSystem will use LookupEncoder to access this registry map +// and select encoders to use when serializing data for linking and storage. +// LinkSystem itself is not hardcoded to use the global LookupEncoder feature; +// therefore, you don't want to rely on this mapping, you can always construct your own LinkSystem. +// +// No effort is made to detect conflicting registrations in this map. +// If your dependency tree is such that this becomes a problem, +// there are two ways to address this: +// If RegisterEncoder is called with the same indicator code more than once, the last call wins. +// In practice, this means that if an application has a strong opinion about what implementation for a certain codec, +// then this can be done by making a Register call with that effect at init time in the application's main package. +// This should have the desired effect because the root of the import tree has its init time effect last. +// Alternatively, one can just avoid use of this registry entirely: +// do this by making a LinkSystem that uses a custom EncoderChooser function. +func RegisterEncoder(indicator uint64, encodeFunc ipld.Encoder) { + // This function could arguably be just a bare map access. + // We introduced a function primarily for the interest of potential future changes. + // E.g. one could introduce logging here to help detect unintended conflicting registrations. + // (We probably won't do this, but you can do it yourself as a printf debug hack. :)) + + if encodeFunc == nil { + panic("not sensible to attempt to register a nil function") + } + encoderRegistry[indicator] = encodeFunc +} + +// LookupEncoder yields an ipld.Encoder function matching a multicodec indicator code number. +// +// Multicodec indicator numbers are specified in +// https://github.com/multiformats/multicodec/blob/master/table.csv +// +// To be available from this lookup function, an encoder must have been registered +// for this indicator number by an earlier call to the RegisterEncoder function. +func LookupEncoder(indicator uint64) (ipld.Encoder, error) { + encodeFunc, exists := encoderRegistry[indicator] + if !exists { + return nil, fmt.Errorf("no encoder registered for multicodec code %d (0x%x)", indicator, indicator) + } + return encodeFunc, nil +} + +// RegisterDecoder updates a simple map of multicodec indicator number to ipld.Decoder function. +// The decoder functions registered can be subsequently looked up using LookupDecoder. +// +// Packages which implement an IPLD codec and have a multicodec number reserved in +// https://github.com/multiformats/multicodec/blob/master/table.csv +// are encouraged to register themselves in this map at package init time. +// (Doing this at package init time ensures this map can be accessed without race conditions.) +// +// This registry map is only used for default behaviors. +// For example, linking/cid.DefaultLinkSystem will use LookupDecoder to access this registry map +// and select decoders to use when serializing data for linking and storage. +// LinkSystem itself is not hardcoded to use the global LookupDecoder feature; +// therefore, you don't want to rely on this mapping, you can always construct your own LinkSystem. +// +// No effort is made to detect conflicting registrations in this map. +// If your dependency tree is such that this becomes a problem, +// there are two ways to address this: +// If RegisterDecoder is called with the same indicator code more than once, the last call wins. +// In practice, this means that if an application has a strong opinion about what implementation for a certain codec, +// then this can be done by making a Register call with that effect at init time in the application's main package. +// This should have the desired effect because the root of the import tree has its init time effect last. +// Alternatively, one can just avoid use of this registry entirely: +// do this by making a LinkSystem that uses a custom DecoderChooser function. +func RegisterDecoder(indicator uint64, decodeFunc ipld.Decoder) { + // This function could arguably be just a bare map access. + // We introduced a function primarily for the interest of potential future changes. + // E.g. one could introduce logging here to help detect unintended conflicting registrations. + // (We probably won't do this, but you can do it yourself as a printf debug hack. :)) + + if decodeFunc == nil { + panic("not sensible to attempt to register a nil function") + } + decoderRegistry[indicator] = decodeFunc +} + +// LookupDecoder yields an ipld.Decoder function matching a multicodec indicator code number. +// +// Multicodec indicator numbers are specified in +// https://github.com/multiformats/multicodec/blob/master/table.csv +// +// To be available from this lookup function, an decoder must have been registered +// for this indicator number by an earlier call to the RegisterDecoder function. +func LookupDecoder(indicator uint64) (ipld.Decoder, error) { + decodeFunc, exists := decoderRegistry[indicator] + if !exists { + return nil, fmt.Errorf("no decoder registered for multicodec code %d (0x%x)", indicator, indicator) + } + return decodeFunc, nil +} diff --git a/schema/dmt/parse_test.go b/schema/dmt/parse_test.go index 7b002c72..1e86ec5f 100644 --- a/schema/dmt/parse_test.go +++ b/schema/dmt/parse_test.go @@ -35,7 +35,7 @@ import ( // func TestSchemaSchemaParse(t *testing.T) { nb := schemadmt.Type.Schema__Repr.NewBuilder() - if err := dagjson.Decoder(nb, strings.NewReader(` + if err := dagjson.Decode(nb, strings.NewReader(` { "types": { "TypeName": { diff --git a/storage/doc.go b/storage/doc.go new file mode 100644 index 00000000..eaaebbb2 --- /dev/null +++ b/storage/doc.go @@ -0,0 +1,9 @@ +// Storage contains some simple implementations for the +// ipld.BlockReadOpener and ipld.BlockWriteOpener interfaces, +// which are typically used by composition in a LinkSystem. +// +// These are provided as simple "batteries included" storage systems. +// They are aimed at being quickly usable to build simple demonstrations. +// For heavy usage (large datasets, with caching, etc) you'll probably +// want to start looking for other libraries which go deeper on this subject. +package storage diff --git a/storage/memory.go b/storage/memory.go new file mode 100644 index 00000000..52ba4b8d --- /dev/null +++ b/storage/memory.go @@ -0,0 +1,52 @@ +package storage + +import ( + "bytes" + "fmt" + "io" + + "github.com/ipld/go-ipld-prime" +) + +// Memory is a simple in-memory storage for data indexed by ipld.Link. +// (It's little more than a map -- in fact, the map is exported, +// and you can poke it directly.) +// +// The OpenRead method conforms to ipld.BlockReadOpener, +// and the OpenWrite method conforms to ipld.BlockWriteOpener. +// Therefore it's easy to use in a LinkSystem like this: +// +// store := storage.Memory{} +// lsys.StorageReadOpener = (&store).OpenRead +// lsys.StorageWriteOpener = (&store).OpenWrite +// +// This storage is mostly expected to be used for testing and demos, +// and as an example of how you can implement and integrate your own storage systems. +type Memory struct { + Bag map[ipld.Link][]byte +} + +func (store *Memory) beInitialized() { + if store.Bag != nil { + return + } + store.Bag = make(map[ipld.Link][]byte) +} + +func (store *Memory) OpenRead(lnkCtx ipld.LinkContext, lnk ipld.Link) (io.Reader, error) { + store.beInitialized() + data, exists := store.Bag[lnk] + if !exists { + return nil, fmt.Errorf("404") // FIXME this needs a standard error type + } + return bytes.NewReader(data), nil +} + +func (store *Memory) OpenWrite(lnkCtx ipld.LinkContext) (io.Writer, ipld.BlockWriteCommitter, error) { + store.beInitialized() + buf := bytes.Buffer{} + return &buf, func(lnk ipld.Link) error { + store.Bag[lnk] = buf.Bytes() + return nil + }, nil +} diff --git a/traversal/common.go b/traversal/common.go index 5b547302..d3e2282d 100644 --- a/traversal/common.go +++ b/traversal/common.go @@ -3,7 +3,6 @@ package traversal import ( "context" "fmt" - "io" ipld "github.com/ipld/go-ipld-prime" "github.com/ipld/go-ipld-prime/schema" @@ -19,11 +18,6 @@ func (tc *Config) init() { if tc.Ctx == nil { tc.Ctx = context.Background() } - if tc.LinkLoader == nil { - tc.LinkLoader = func(ipld.Link, ipld.LinkContext) (io.Reader, error) { - return nil, fmt.Errorf("no link loader configured") - } - } if tc.LinkTargetNodePrototypeChooser == nil { tc.LinkTargetNodePrototypeChooser = func(lnk ipld.Link, lnkCtx ipld.LinkContext) (ipld.NodePrototype, error) { if tlnkNd, ok := lnkCtx.LinkNode.(schema.TypedLinkNode); ok { @@ -32,11 +26,6 @@ func (tc *Config) init() { return nil, fmt.Errorf("no LinkTargetNodePrototypeChooser configured") } } - if tc.LinkStorer == nil { - tc.LinkStorer = func(ipld.LinkContext) (io.Writer, ipld.StoreCommitter, error) { - return nil, nil, fmt.Errorf("no link storer configured") - } - } } func (prog *Progress) init() { diff --git a/traversal/fns.go b/traversal/fns.go index d0902dfb..d359c673 100644 --- a/traversal/fns.go +++ b/traversal/fns.go @@ -39,9 +39,8 @@ type Progress struct { type Config struct { Ctx context.Context // Context carried through a traversal. Optional; use it if you need cancellation. - LinkLoader ipld.Loader // Loader used for automatic link traversal. + LinkSystem ipld.LinkSystem // LinkSystem used for automatic link loading, and also any storing if mutation features (e.g. traversal.Transform) are used. LinkTargetNodePrototypeChooser LinkTargetNodePrototypeChooser // Chooser for Node implementations to produce during automatic link traversal. - LinkStorer ipld.Storer // Storer used if any mutation features (e.g. traversal.Transform) are used. } // LinkTargetNodePrototypeChooser is a function that returns a NodePrototype based on diff --git a/traversal/focus.go b/traversal/focus.go index 741fd366..cc54fa3e 100644 --- a/traversal/focus.go +++ b/traversal/focus.go @@ -112,8 +112,8 @@ func (prog *Progress) get(n ipld.Node, p ipld.Path, trackProgress bool) (ipld.No // Dereference any links. for n.Kind() == ipld.Kind_Link { lnk, _ := n.AsLink() - // Assemble the LinkContext in case the Loader or NBChooser want it. lnkCtx := ipld.LinkContext{ + Ctx: prog.Cfg.Ctx, LinkPath: p.Truncate(i), LinkNode: n, ParentNode: prev, @@ -123,14 +123,9 @@ func (prog *Progress) get(n ipld.Node, p ipld.Path, trackProgress bool) (ipld.No if err != nil { return nil, fmt.Errorf("error traversing node at %q: could not load link %q: %s", p.Truncate(i+1), lnk, err) } - nb := np.NewBuilder() // Load link! - err = lnk.Load( - prog.Cfg.Ctx, - lnkCtx, - nb, - prog.Cfg.LinkLoader, - ) + prev = n + n, err = prog.Cfg.LinkSystem.Load(lnkCtx, lnk, np) if err != nil { return nil, fmt.Errorf("error traversing node at %q: could not load link %q: %s", p.Truncate(i+1), lnk, err) } @@ -138,7 +133,6 @@ func (prog *Progress) get(n ipld.Node, p ipld.Path, trackProgress bool) (ipld.No prog.LastBlock.Path = p.Truncate(i + 1) prog.LastBlock.Link = lnk } - prev, n = n, nb.Build() } } if trackProgress { @@ -325,6 +319,7 @@ func (prog Progress) focusedTransform(n ipld.Node, na ipld.NodeAssembler, p ipld return la.Finish() case ipld.Kind_Link: lnkCtx := ipld.LinkContext{ + Ctx: prog.Cfg.Ctx, LinkPath: prog.Path, LinkNode: n, ParentNode: nil, // TODO inconvenient that we don't have this. maybe this whole case should be a helper function. @@ -335,14 +330,11 @@ func (prog Progress) focusedTransform(n ipld.Node, na ipld.NodeAssembler, p ipld if err != nil { return fmt.Errorf("transform: error traversing node at %q: could not load link %q: %s", prog.Path, lnk, err) } - nb := np.NewBuilder() // Load link! - err = lnk.Load( - prog.Cfg.Ctx, - lnkCtx, - nb, - prog.Cfg.LinkLoader, - ) + // We'll use LinkSystem.Fill here rather than Load, + // because there's a nice opportunity to reuse the builder shortly. + nb := np.NewBuilder() + err = prog.Cfg.LinkSystem.Fill(lnkCtx, lnk, nb) if err != nil { return fmt.Errorf("transform: error traversing node at %q: could not load link %q: %s", prog.Path, lnk, err) } @@ -359,7 +351,7 @@ func (prog Progress) focusedTransform(n ipld.Node, na ipld.NodeAssembler, p ipld return err } n = nb.Build() - lnk, err = lnk.LinkBuilder().Build(prog.Cfg.Ctx, lnkCtx, n, prog.Cfg.LinkStorer) + lnk, err = prog.Cfg.LinkSystem.Store(lnkCtx, lnk.Prototype(), n) if err != nil { return fmt.Errorf("transform: error storing transformed node at %q: %s", prog.Path, err) } diff --git a/traversal/focus_test.go b/traversal/focus_test.go index 062f87b6..d0be50cd 100644 --- a/traversal/focus_test.go +++ b/traversal/focus_test.go @@ -1,31 +1,26 @@ package traversal_test import ( - "bytes" - "context" "fmt" - "io" - "strings" "testing" - "unicode" . "github.com/warpfork/go-wish" - cid "github.com/ipfs/go-cid" - ipld "github.com/ipld/go-ipld-prime" - "github.com/ipld/go-ipld-prime/must" - + "github.com/ipfs/go-cid" + "github.com/ipld/go-ipld-prime" _ "github.com/ipld/go-ipld-prime/codec/dagjson" "github.com/ipld/go-ipld-prime/fluent" cidlink "github.com/ipld/go-ipld-prime/linking/cid" + "github.com/ipld/go-ipld-prime/must" basicnode "github.com/ipld/go-ipld-prime/node/basic" + "github.com/ipld/go-ipld-prime/storage" "github.com/ipld/go-ipld-prime/traversal" ) // Do some fixture fabrication. // We assume all the builders and serialization must Just Work here. -var storage = make(map[ipld.Link][]byte) +var store = storage.Memory{} var ( leafAlpha, leafAlphaLnk = encode(basicnode.NewString("alpha")) leafBeta, leafBetaLnk = encode(basicnode.NewString("beta")) @@ -55,47 +50,22 @@ var ( // just gimme a link and stuff the bytes in a map. // (also return the node again for convenient assignment.) func encode(n ipld.Node) (ipld.Node, ipld.Link) { - lb := cidlink.LinkBuilder{cid.Prefix{ + lp := cidlink.LinkPrototype{cid.Prefix{ Version: 1, Codec: 0x0129, - MhType: 0x17, + MhType: 0x13, MhLength: 4, }} - lnk, err := lb.Build(context.Background(), ipld.LinkContext{}, n, - func(ipld.LinkContext) (io.Writer, ipld.StoreCommitter, error) { - buf := bytes.Buffer{} - return &buf, func(lnk ipld.Link) error { - storage[lnk] = buf.Bytes() - return nil - }, nil - }, - ) + lsys := cidlink.DefaultLinkSystem() + lsys.StorageWriteOpener = (&store).OpenWrite + + lnk, err := lsys.Store(ipld.LinkContext{}, lp, n) if err != nil { panic(err) } return n, lnk } -// Print a quick little table of our fixtures for sanity check purposes. -func init() { - withoutWhitespace := func(s string) string { - return strings.Map(func(r rune) rune { - if !unicode.IsPrint(r) { - return -1 - } else { - return r - } - }, s) - } - fmt.Printf("fixtures:\n"+strings.Repeat("\t%v\t%v\n", 5), - leafAlphaLnk, withoutWhitespace(string(storage[leafAlphaLnk])), - leafBetaLnk, withoutWhitespace(string(storage[leafBetaLnk])), - middleMapNodeLnk, withoutWhitespace(string(storage[middleMapNodeLnk])), - middleListNodeLnk, withoutWhitespace(string(storage[middleListNodeLnk])), - rootNodeLnk, withoutWhitespace(string(storage[rootNodeLnk])), - ) -} - // covers Focus used on one already-loaded Node; no link-loading exercised. func TestFocusSingleTree(t *testing.T) { t.Run("empty path on scalar node returns start node", func(t *testing.T) { @@ -162,11 +132,11 @@ func TestFocusWithLinkLoading(t *testing.T) { }) }) t.Run("link traversal with loader should work", func(t *testing.T) { + lsys := cidlink.DefaultLinkSystem() + lsys.StorageReadOpener = (&store).OpenRead err := traversal.Progress{ Cfg: &traversal.Config{ - LinkLoader: func(lnk ipld.Link, _ ipld.LinkContext) (io.Reader, error) { - return bytes.NewReader(storage[lnk]), nil - }, + LinkSystem: lsys, LinkTargetNodePrototypeChooser: func(_ ipld.Link, _ ipld.LinkContext) (ipld.NodePrototype, error) { return basicnode.Prototype__Any{}, nil }, @@ -194,11 +164,11 @@ func TestGetWithLinkLoading(t *testing.T) { }) }) t.Run("link traversal with loader should work", func(t *testing.T) { + lsys := cidlink.DefaultLinkSystem() + lsys.StorageReadOpener = (&store).OpenRead n, err := traversal.Progress{ Cfg: &traversal.Config{ - LinkLoader: func(lnk ipld.Link, _ ipld.LinkContext) (io.Reader, error) { - return bytes.NewReader(storage[lnk]), nil - }, + LinkSystem: lsys, LinkTargetNodePrototypeChooser: func(_ ipld.Link, _ ipld.LinkContext) (ipld.NodePrototype, error) { return basicnode.Prototype__Any{}, nil }, @@ -337,21 +307,15 @@ func TestFocusedTransform(t *testing.T) { } func TestFocusedTransformWithLinks(t *testing.T) { - var storage2 = make(map[ipld.Link][]byte) + var store2 = storage.Memory{} + lsys := cidlink.DefaultLinkSystem() + lsys.StorageReadOpener = (&store).OpenRead + lsys.StorageWriteOpener = (&store2).OpenWrite cfg := traversal.Config{ - LinkLoader: func(lnk ipld.Link, _ ipld.LinkContext) (io.Reader, error) { - return bytes.NewReader(storage[lnk]), nil - }, + LinkSystem: lsys, LinkTargetNodePrototypeChooser: func(_ ipld.Link, _ ipld.LinkContext) (ipld.NodePrototype, error) { return basicnode.Prototype.Any, nil }, - LinkStorer: func(lnkCtx ipld.LinkContext) (io.Writer, ipld.StoreCommitter, error) { - wr := bytes.Buffer{} - return &wr, func(link ipld.Link) error { - storage2[link] = wr.Bytes() - return nil - }, nil - }, } t.Run("UpdateMapBeyondLink", func(t *testing.T) { n, err := traversal.Progress{ @@ -360,7 +324,7 @@ func TestFocusedTransformWithLinks(t *testing.T) { Wish(t, progress.Path.String(), ShouldEqual, "linkedMap/nested/nonlink") Wish(t, must.String(prev), ShouldEqual, "zoo") Wish(t, progress.LastBlock.Path.String(), ShouldEqual, "linkedMap") - Wish(t, progress.LastBlock.Link.String(), ShouldEqual, "baguqefye7xlxqda") + Wish(t, progress.LastBlock.Link.String(), ShouldEqual, "baguqeeyevmbz3ga") nb := prev.Prototype().NewBuilder() nb.AssignString("new string!") return nb.Build(), nil @@ -368,9 +332,9 @@ func TestFocusedTransformWithLinks(t *testing.T) { Wish(t, err, ShouldEqual, nil) Wish(t, n.Kind(), ShouldEqual, ipld.Kind_Map) // there should be a new object in our new storage! - Wish(t, len(storage2), ShouldEqual, 1) + Wish(t, len(store2.Bag), ShouldEqual, 1) // cleanup for next test - storage2 = make(map[ipld.Link][]byte) + store2 = storage.Memory{} }) t.Run("UpdateNotBeyondLink", func(t *testing.T) { // This is replacing a link with a non-link. Doing so shouldn't hit storage. @@ -385,9 +349,9 @@ func TestFocusedTransformWithLinks(t *testing.T) { Wish(t, err, ShouldEqual, nil) Wish(t, n.Kind(), ShouldEqual, ipld.Kind_Map) // there should be no new objects in our new storage! - Wish(t, len(storage2), ShouldEqual, 0) + Wish(t, len(store2.Bag), ShouldEqual, 0) // cleanup for next test - storage2 = make(map[ipld.Link][]byte) + store2 = storage.Memory{} }) // link traverse to scalar // this is unspecifiable using the current path syntax! you'll just end up replacing the link with the scalar! diff --git a/traversal/selector/exploreRecursive_test.go b/traversal/selector/exploreRecursive_test.go index d1ec78b1..e6e20705 100644 --- a/traversal/selector/exploreRecursive_test.go +++ b/traversal/selector/exploreRecursive_test.go @@ -199,7 +199,7 @@ func TestExploreRecursiveExplore(t *testing.T) { } ` nb := basicnode.Prototype__Any{}.NewBuilder() - err := dagjson.Decoder(nb, strings.NewReader(nodeString)) + err := dagjson.Decode(nb, strings.NewReader(nodeString)) Wish(t, err, ShouldEqual, nil) rn := nb.Build() rs = rs.Explore(rn, ipld.PathSegmentOfString("Parents")) @@ -250,7 +250,7 @@ func TestExploreRecursiveExplore(t *testing.T) { } ` nb := basicnode.Prototype__Any{}.NewBuilder() - err := dagjson.Decoder(nb, strings.NewReader(nodeString)) + err := dagjson.Decode(nb, strings.NewReader(nodeString)) Wish(t, err, ShouldEqual, nil) rn := nb.Build() rs = rs.Explore(rn, ipld.PathSegmentOfString("Parents")) @@ -293,7 +293,7 @@ func TestExploreRecursiveExplore(t *testing.T) { } ` nb := basicnode.Prototype__Any{}.NewBuilder() - err := dagjson.Decoder(nb, strings.NewReader(nodeString)) + err := dagjson.Decode(nb, strings.NewReader(nodeString)) Wish(t, err, ShouldEqual, nil) rn := nb.Build() rs = rs.Explore(rn, ipld.PathSegmentOfString("Parents")) @@ -338,7 +338,7 @@ func TestExploreRecursiveExplore(t *testing.T) { } ` nb := basicnode.Prototype__Any{}.NewBuilder() - err := dagjson.Decoder(nb, strings.NewReader(nodeString)) + err := dagjson.Decode(nb, strings.NewReader(nodeString)) Wish(t, err, ShouldEqual, nil) n := nb.Build() @@ -423,7 +423,7 @@ func TestExploreRecursiveExplore(t *testing.T) { } ` nb := basicnode.Prototype__Any{}.NewBuilder() - err := dagjson.Decoder(nb, strings.NewReader(nodeString)) + err := dagjson.Decode(nb, strings.NewReader(nodeString)) Wish(t, err, ShouldEqual, nil) rn := nb.Build() rs = rs.Explore(rn, ipld.PathSegmentOfString("Parents")) diff --git a/traversal/walk.go b/traversal/walk.go index cd07a7b4..30253984 100644 --- a/traversal/walk.go +++ b/traversal/walk.go @@ -177,8 +177,8 @@ func (prog Progress) loadLink(v ipld.Node, parent ipld.Node) (ipld.Node, error) if err != nil { return nil, err } - // Assemble the LinkContext in case the Loader or NBChooser want it. lnkCtx := ipld.LinkContext{ + Ctx: prog.Cfg.Ctx, LinkPath: prog.Path, LinkNode: v, ParentNode: parent, @@ -188,21 +188,15 @@ func (prog Progress) loadLink(v ipld.Node, parent ipld.Node) (ipld.Node, error) if err != nil { return nil, fmt.Errorf("error traversing node at %q: could not load link %q: %s", prog.Path, lnk, err) } - nb := np.NewBuilder() // Load link! - err = lnk.Load( - prog.Cfg.Ctx, - lnkCtx, - nb, - prog.Cfg.LinkLoader, - ) + n, err := prog.Cfg.LinkSystem.Load(lnkCtx, lnk, np) if err != nil { if _, ok := err.(SkipMe); ok { return nil, err } return nil, fmt.Errorf("error traversing node at %q: could not load link %q: %s", prog.Path, lnk, err) } - return nb.Build(), nil + return n, nil } // WalkTransforming walks a graph of Nodes, deciding which to alter by applying a Selector, diff --git a/traversal/walk_test.go b/traversal/walk_test.go index d6fa8fbc..a004582d 100644 --- a/traversal/walk_test.go +++ b/traversal/walk_test.go @@ -1,15 +1,14 @@ package traversal_test import ( - "bytes" - "io" "testing" . "github.com/warpfork/go-wish" - ipld "github.com/ipld/go-ipld-prime" + "github.com/ipld/go-ipld-prime" _ "github.com/ipld/go-ipld-prime/codec/dagjson" "github.com/ipld/go-ipld-prime/fluent" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" basicnode "github.com/ipld/go-ipld-prime/node/basic" "github.com/ipld/go-ipld-prime/traversal" "github.com/ipld/go-ipld-prime/traversal/selector" @@ -119,11 +118,11 @@ func TestWalkMatching(t *testing.T) { )) s, err := ss.Selector() var order int + lsys := cidlink.DefaultLinkSystem() + lsys.StorageReadOpener = (&store).OpenRead err = traversal.Progress{ Cfg: &traversal.Config{ - LinkLoader: func(lnk ipld.Link, _ ipld.LinkContext) (io.Reader, error) { - return bytes.NewReader(storage[lnk]), nil - }, + LinkSystem: lsys, LinkTargetNodePrototypeChooser: func(_ ipld.Link, _ ipld.LinkContext) (ipld.NodePrototype, error) { return basicnode.Prototype__Any{}, nil }, @@ -165,11 +164,11 @@ func TestWalkMatching(t *testing.T) { ss := ssb.ExploreRange(0, 3, ssb.Matcher()) s, err := ss.Selector() var order int + lsys := cidlink.DefaultLinkSystem() + lsys.StorageReadOpener = (&store).OpenRead err = traversal.Progress{ Cfg: &traversal.Config{ - LinkLoader: func(lnk ipld.Link, _ ipld.LinkContext) (io.Reader, error) { - return bytes.NewReader(storage[lnk]), nil - }, + LinkSystem: lsys, LinkTargetNodePrototypeChooser: func(_ ipld.Link, _ ipld.LinkContext) (ipld.NodePrototype, error) { return basicnode.Prototype__Any{}, nil }, @@ -210,11 +209,11 @@ func TestWalkMatching(t *testing.T) { }) s, err := ss.Selector() var order int + lsys := cidlink.DefaultLinkSystem() + lsys.StorageReadOpener = (&store).OpenRead err = traversal.Progress{ Cfg: &traversal.Config{ - LinkLoader: func(lnk ipld.Link, _ ipld.LinkContext) (io.Reader, error) { - return bytes.NewReader(storage[lnk]), nil - }, + LinkSystem: lsys, LinkTargetNodePrototypeChooser: func(_ ipld.Link, _ ipld.LinkContext) (ipld.NodePrototype, error) { return basicnode.Prototype__Any{}, nil },