Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

introduce LinkSystem #143

Merged
merged 10 commits into from
Mar 12, 2021
6 changes: 3 additions & 3 deletions codec/dagcbor/multicodec.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
"github.com/polydawn/refmt/cbor"

"github.com/ipld/go-ipld-prime"
"github.com/ipld/go-ipld-prime/codec"
"github.com/ipld/go-ipld-prime/multicodec"
)

var (
Expand All @@ -15,8 +15,8 @@ var (
)

func init() {
codec.MulticodecEncoderRegistry[0x71] = Encode
codec.MulticodecDecoderRegistry[0x71] = Decode
multicodec.EncoderRegistry[0x71] = Encode
multicodec.DecoderRegistry[0x71] = Decode
}

func Decode(na ipld.NodeAssembler, r io.Reader) error {
Expand Down
2 changes: 1 addition & 1 deletion codec/dagcbor/roundtripCidlink_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ func TestRoundtripCidlink(t *testing.T) {
lp := cidlink.LinkPrototype{cid.Prefix{
Version: 1,
Codec: 0x71,
MhType: 0x17,
MhType: 0x13,
MhLength: 4,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this not be inferred from the type?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

{ not ( :) it's an adapter type, not an interface cast.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think i was wanting the apparently deprecated '-1' to indicate default length because a lot of these hashes have a single length that makes sense, and i don't want to be remembering / fumble that sha1 should be 20 bytes while sha224 should be 28 bytes every time i make one of these.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, sorry, I thought the comment was on the diff line, since that's what github highlighted most brightly.

Yeah, agree. I wish the go-cid and/or go-multihash libraries were more friendly about this very common user story.

I think a -1 should flow through and do whatever go-multihash does, still. And I have no idea why that's deprecated, fwiw. (A lot of things in go-multihash seem deprecated without much comment on why or what to do instead. I think some review and renovation of that is overdue.)

It's slightly on the other side of where I'm cordoning my renovation today, though.

}}
lsys := cidlink.DefaultLinkSystem()
Expand Down
2 changes: 1 addition & 1 deletion codec/dagcbor/roundtrip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ func TestRoundtripLinksAndBytes(t *testing.T) {
lnk := cidlink.LinkPrototype{cid.Prefix{
Version: 1,
Codec: 0x71,
MhType: 0x17,
MhType: 0x13,
MhLength: 4,
}}.BuildLink([]byte{1, 2, 3, 4}) // dummy value, content does not matter to this test.

Expand Down
6 changes: 3 additions & 3 deletions codec/dagjson/multicodec.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"github.com/polydawn/refmt/json"

"github.com/ipld/go-ipld-prime"
"github.com/ipld/go-ipld-prime/codec"
"github.com/ipld/go-ipld-prime/multicodec"
)

var (
Expand All @@ -16,8 +16,8 @@ var (
)

func init() {
codec.MulticodecEncoderRegistry[0x0129] = Encode
codec.MulticodecDecoderRegistry[0x0129] = Decode
multicodec.EncoderRegistry[0x0129] = Encode
multicodec.DecoderRegistry[0x0129] = Decode
}

func Decode(na ipld.NodeAssembler, r io.Reader) error {
Expand Down
4 changes: 2 additions & 2 deletions codec/dagjson/roundtripCidlink_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ func TestRoundtripCidlink(t *testing.T) {
lp := cidlink.LinkPrototype{cid.Prefix{
Version: 1,
Codec: 0x0129,
MhType: 0x17,
MhType: 0x13,
MhLength: 4,
}}
lsys := cidlink.DefaultLinkSystem()
Expand Down Expand Up @@ -48,7 +48,7 @@ func TestUnmarshalTrickyMapContainingLink(t *testing.T) {
lnk := cidlink.LinkPrototype{cid.Prefix{
Version: 1,
Codec: 0x71,
MhType: 0x17,
MhType: 0x13,
MhLength: 4,
}}.BuildLink([]byte{1, 2, 3, 4}) // dummy value, content does not matter to this test.

Expand Down
9 changes: 5 additions & 4 deletions linking/cid/linksystem.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,16 @@ import (
"hash"

"github.com/ipld/go-ipld-prime"
"github.com/ipld/go-ipld-prime/codec"
"github.com/ipld/go-ipld-prime/multicodec"
"github.com/ipld/go-ipld-prime/multihash"
)

func DefaultLinkSystem() ipld.LinkSystem {
return ipld.LinkSystem{
EncoderChooser: func(lp ipld.LinkPrototype) (ipld.Encoder, error) {
switch lp2 := lp.(type) {
case LinkPrototype:
fn, ok := codec.MulticodecEncoderRegistry[lp2.GetCodec()]
fn, ok := multicodec.EncoderRegistry[lp2.GetCodec()]
if !ok {
return nil, fmt.Errorf("no encoder registered for multicodec indicator 0x%x", lp2.GetCodec())
}
Expand All @@ -26,7 +27,7 @@ func DefaultLinkSystem() ipld.LinkSystem {
lp := lnk.Prototype()
switch lp2 := lp.(type) {
case LinkPrototype:
fn, ok := codec.MulticodecDecoderRegistry[lp2.GetCodec()]
fn, ok := multicodec.DecoderRegistry[lp2.GetCodec()]
if !ok {
return nil, fmt.Errorf("no decoder registered for multicodec indicator 0x%x", lp2.GetCodec())
}
Expand All @@ -38,7 +39,7 @@ func DefaultLinkSystem() ipld.LinkSystem {
HasherChooser: func(lp ipld.LinkPrototype) (hash.Hash, error) {
switch lp2 := lp.(type) {
case LinkPrototype:
fn, ok := codec.MultihashRegistry[lp2.MhType]
fn, ok := multihash.Registry[lp2.MhType]
if !ok {
return nil, fmt.Errorf("no hasher registered for multihash indicator 0x%x", lp2.MhType)
}
Expand Down
8 changes: 4 additions & 4 deletions linkingExamples_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ func ExampleStoringLink() {
lp := cidlink.LinkPrototype{cid.Prefix{
Version: 1, // Usually '1'.
Codec: 0x71, // 0x71 means "dag-cbor" -- See the multicodecs table: https://github.com/multiformats/multicodec/
MhType: 0x15, // 0x15 means "sha3-384" -- See the multicodecs table: https://github.com/multiformats/multicodec/
MhLength: 48, // sha3-384 hash has a 48-byte sum.
MhType: 0x13, // 0x20 means "sha2-512" -- See the multicodecs table: https://github.com/multiformats/multicodec/
MhLength: 64, // sha2-512 hash has a 64-byte sum.
}}

// And we need some data to link to! Here's a quick piece of example data:
Expand Down Expand Up @@ -85,13 +85,13 @@ func ExampleStoringLink() {
// We'll pick this data back up again in the example for loading.

// Output:
// link: bafyrkmbukvrgzcs6qlsh4wvkvbe5wp7sclcblfnapnb2xfznisbykpbnlocet2qzley3cpxofoxqrnqgm3ta
// link: bafyrgqhai26anf3i7pips7q22coa4sz2fr4gk4q4sqdtymvvjyginfzaqewveaeqdh524nsktaq43j65v22xxrybrtertmcfxufdam3da3hbk
// concrete type: `cidlink.Link`
}

func ExampleLoadingLink() {
// Let's say we want to load this link (it's the same one we just created in the example above).
cid, _ := cid.Decode("bafyrkmbukvrgzcs6qlsh4wvkvbe5wp7sclcblfnapnb2xfznisbykpbnlocet2qzley3cpxofoxqrnqgm3ta")
cid, _ := cid.Decode("bafyrgqhai26anf3i7pips7q22coa4sz2fr4gk4q4sqdtymvvjyginfzaqewveaeqdh524nsktaq43j65v22xxrybrtertmcfxufdam3da3hbk")
lnk := cidlink.Link{cid}

// Let's get a LinkSystem. We're going to be working with CID links,
Expand Down
51 changes: 5 additions & 46 deletions codec/multicodecs.go → multicodec/multicodec.go
Original file line number Diff line number Diff line change
@@ -1,18 +1,10 @@
package codec
package multicodec

import (
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
"crypto/sha512"
"hash"

"golang.org/x/crypto/sha3"

"github.com/ipld/go-ipld-prime"
)

// MulticodecEncoderRegistry is a simple map which maps a multicodec indicator number
// EncoderRegistry is a simple map which maps a multicodec indicator number
// to an ipld.Encoder function.
//
// Packages which implement an IPLD codec and have a multicodec number reserved in
Expand All @@ -29,9 +21,9 @@ import (
// If more than one package registers for the same multicodec indicator, and
// you somehow end up with both in your import tree, and yet care about which wins:
// then just don't use this registry anymore: make a LinkSystem that does what you need.)
var MulticodecEncoderRegistry = make(map[uint64]ipld.Encoder)
var EncoderRegistry = make(map[uint64]ipld.Encoder)

// MulticodecDecoderRegistry is a simple map which maps a multicodec indicator number
// DecoderRegistry is a simple map which maps a multicodec indicator number
// to an ipld.Decoder function.
//
// Packages which implement an IPLD codec and have a multicodec number reserved in
Expand All @@ -48,37 +40,4 @@ var MulticodecEncoderRegistry = make(map[uint64]ipld.Encoder)
// If more than one package registers for the same multicodec indicator, and
// you somehow end up with both in your import tree, and yet care about which wins:
// then just don't use this registry anymore: make a LinkSystem that does what you need.)
var MulticodecDecoderRegistry = make(map[uint64]ipld.Decoder)

// MultihashRegistry is a simple map which maps a multihash indicator number
// to a standard golang Hash interface.
//
// Hashers which are available in the golang stdlib are registered here automatically.
// Some hashes from x/crypto are also included out-of-the-box.
//
// Packages which want to register more hashing functions and have a multihash number reserved in
// https://github.com/multiformats/multicodec/blob/master/table.csv
// are encouraged to do so at package init time.
// (Doing this at package init time ensures this map can be accessed without race conditions.)
//
// The linking/cid.DefaultLinkSystem will use this map to find decoders
// to use when deserializing data from storage.
//
// This registry map is only used for default behaviors.
// If you don't want to rely on it, you can always construct your own LinkSystem.
// (For this reason, there's no special effort made to detect conflicting registrations in this map.
// If more than one package registers for the same multicodec indicator, and
// you somehow end up with both in your import tree, and yet care about which wins:
// then just don't use this registry anymore: make a LinkSystem that does what you need.)
var MultihashRegistry = make(map[uint64]func() hash.Hash)

func init() {
MultihashRegistry[0xd5] = md5.New
MultihashRegistry[0x11] = sha1.New
MultihashRegistry[0x12] = sha256.New
MultihashRegistry[0x13] = sha512.New
MultihashRegistry[0x14] = sha3.New512
MultihashRegistry[0x15] = sha3.New384
MultihashRegistry[0x16] = sha3.New256
MultihashRegistry[0x17] = sha3.New224
}
var DecoderRegistry = make(map[uint64]ipld.Decoder)
51 changes: 51 additions & 0 deletions multihash/errata.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package multihash

import (
"bytes"
"crypto/sha256"
"hash"
)

type identityMultihash struct {
bytes.Buffer
}

func (identityMultihash) BlockSize() int {
return 32 // A prefered block size is nonsense for the "identity" "hash". An arbitrary but unsurprising and positive nonzero number has been chosen to minimize the odds of fascinating bugs.
}

func (x identityMultihash) Size() int {
return x.Len()
}

func (x identityMultihash) Sum(digest []byte) []byte {
return x.Bytes()
}

type doubleSha256 struct {
main hash.Hash
}

func (x doubleSha256) Write(body []byte) (int, error) {
return x.main.Write(body)
}

func (doubleSha256) BlockSize() int {
return sha256.BlockSize
}

func (doubleSha256) Size() int {
return sha256.Size
}

func (x doubleSha256) Reset() {
x.main.Reset()
}

func (x doubleSha256) Sum(digest []byte) []byte {
intermediate := [sha256.Size]byte{}
x.main.Sum(intermediate[:])
h2 := sha256.New()
h2.Write(intermediate[:])
return h2.Sum(digest)
}
48 changes: 48 additions & 0 deletions multihash/multihash.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package multihash

import (
"crypto/md5"
"crypto/sha1"
"crypto/sha256"
"crypto/sha512"
"hash"
)

// Registry is a simple map which maps a multihash indicator number
// to a standard golang Hash interface.
//
// Multihash indicator numbers are reserved and described in
// https://github.com/multiformats/multicodec/blob/master/table.csv .
// The keys used in this map must match those reservations.
//
// Hashers which are available in the golang stdlib are registered here automatically.
//
// Packages which want to register more hashing functions (and have a multihash number reserved!)
// are encouraged to do so at package init time.
// (Doing this at package init time ensures this map can be accessed without race conditions.)
//
// The linking/cid.DefaultLinkSystem will use this map to find hashers
// to use when serializing data and computing links,
// and when loading data from storage and verifying its integrity.
//
// This registry map is only used for default behaviors.
// If you don't want to rely on it, you can always construct your own LinkSystem.
// (For this reason, there's no special effort made to detect conflicting registrations in this map.
// If more than one package registers for the same multicodec indicator, and
// you somehow end up with both in your import tree, and yet care about which wins:
// then just don't use this registry anymore: make a LinkSystem that does what you need.)
// This should never be done to make behavior alterations
// (hash functions are well standardized and so is the multihash indicator table),
// but may be relevant if one is really itching to try out different hash implementations for performance reasons.
var Registry = make(map[uint64]func() hash.Hash)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reality is there are a bunch of 3rd party hashes that end up in the current multihash registry in ipfs world and won't conform to hash.Hash

How do we expect to support that backwards compatibility?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With all the terrible glue code I just wrote. ┐_(ツ)_┌

See the new gadgets in multihash/register/sha3 for handling Shake, for example.

(Like actually, review appreciated. I just utterly invented a BlockSize which is nonsensical. My theory is that it's harmless.)

I'll admit, I had kind of a discovery process today. I didn't realize how many things in golang don't conform to hash.Hash directly.

At the same time: I think still more things conform to hash.Hash than conform to anything else, so I think sticking to that as the centroid is probably the best option available.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

again - isn't the reality that we need to take existing 3rd party codecs currently being registered with mulithash, and be able to keep them working after this transition? not "write a custom shim for each case" but be backwards compatible for them? If that grows to a week+ of work, it's less likely to make it into this refactor.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, "3rd party" in the sense of beyond-the-third-party-things-already-directly-depended-on-by-go-mulitcodec. Mmm. Okay, so, I think I had stated earlier that this was impossible, and I think I walk that back. On further look, it seems possible; I just didn't see how at first, because it requires multiple steps, not just the so-called register function in that library.

But I still suspect, looking over the flavor of some of the issues in that repo, and how issues and PRs seem to focus on getting new things added to the library, that the number of third-party registrations made in the wild is asymptotically close to zero.

So I think at the moment: we can functionally disregard the possibility. And if there are any applications out there which hit a speedbump here, they hit it when converting to go-ipld-prime... which is probably already in their roadmap as a speedbump, so there shouldn't be a category-change in speedbumpism, and so as a result I think everyone involved in such a hypothetical will probably survive.

Longer run: yeah, conversation is needed about how to re-reconcile this with go-multihash. I think a bigger venue than this PR is going to be needed for that, though.


func init() {
Registry[0x00] = func() hash.Hash { return &identityMultihash{} }
Registry[0xd5] = md5.New
Registry[0x11] = sha1.New
Registry[0x12] = sha256.New
Registry[0x13] = sha512.New
// Registry[0x1f] = sha256.New224 // SOON
// Registry[0x20] = sha512.New384 // SOON
Registry[0x56] = func() hash.Hash { return &doubleSha256{} }
}
23 changes: 23 additions & 0 deletions multihash/register/all/multihash_all.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
This package has no purpose except to perform registration of mulithashes.

It is meant to be used as a side-effecting import, e.g.

import (
_ "github.com/ipld/go-ipld-prime/mulithash/register/all"
)

This package registers many multihashes at once.
Importing it will increase the size of your dependency tree significantly.
It's recommended that you import this package if you're building some
kind of data broker application, which may need to handle many different kinds of hashes;
if you're building an application which you know only handles a specific hash,
importing this package may bloat your builds unnecessarily.
*/
package all

import (
_ "github.com/ipld/go-ipld-prime/multihash/register/blake2"
_ "github.com/ipld/go-ipld-prime/multihash/register/murmur3"
_ "github.com/ipld/go-ipld-prime/multihash/register/sha3"
)
48 changes: 48 additions & 0 deletions multihash/register/blake2/multihash_blake2.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
This package has no purpose except to perform registration of multihashes.

It is meant to be used as a side-effecting import, e.g.

import (
_ "github.com/ipld/go-ipld-prime/mulithash/register/blake2"
)

This package registers several multihashes for the blake2 family
(both the 's' and the 'b' variants, and in a variety of sizes).
*/
package blake2

import (
"hash"

"github.com/minio/blake2b-simd"
"golang.org/x/crypto/blake2s"

"github.com/ipld/go-ipld-prime/multihash"
)

const (
BLAKE2B_MIN = 0xb201
BLAKE2B_MAX = 0xb240
BLAKE2S_MIN = 0xb241
BLAKE2S_MAX = 0xb260
)

func init() {
// BLAKE2S
// This package only enables support for 32byte (256 bit) blake2s.
multihash.Registry[BLAKE2S_MIN+31] = func() hash.Hash { h, _ := blake2s.New256(nil); return h }

// BLAKE2B
// There's a whole range of these.
for c := uint64(BLAKE2B_MIN); c <= BLAKE2B_MAX; c++ {
size := int(c - BLAKE2B_MIN + 1)
multihash.Registry[c] = func() hash.Hash {
hasher, err := blake2b.New(&blake2b.Config{Size: uint8(size)})
if err != nil {
panic(err)
}
return hasher
}
}
}
Loading