diff --git a/cmd/car/car.go b/cmd/car/car.go index 2e6ee232..43731362 100644 --- a/cmd/car/car.go +++ b/cmd/car/car.go @@ -16,10 +16,23 @@ func main1() int { Usage: "Utility for working with car files", Commands: []*cli.Command{ { - Name: "create", - Usage: "Create a car file", - Aliases: []string{"c"}, - Action: CreateCar, + Name: "convert", + Usage: "Convert a car file to given codec", + Aliases: []string{"con"}, + Action: ConvertCar, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "selector", + Aliases: []string{"s"}, + Usage: "A selector over the dag", + }, + }, + }, + { + Name: "concatinate", + Usage: "Concatinate car files", + Aliases: []string{"cat"}, + Action: CatCar, Flags: []cli.Flag{ &cli.StringFlag{ Name: "file", @@ -35,15 +48,21 @@ func main1() int { }, }, { - Name: "convert", - Usage: "Convert a car file to given codec", - Aliases: []string{"con"}, - Action: ConvertCar, + Name: "create", + Usage: "Create a car file", + Aliases: []string{"c"}, + Action: CreateCar, Flags: []cli.Flag{ &cli.StringFlag{ - Name: "selector", - Aliases: []string{"s"}, - Usage: "A selector over the dag", + Name: "file", + Aliases: []string{"f", "output", "o"}, + Usage: "The car file to write to", + TakesFile: true, + }, + &cli.IntFlag{ + Name: "version", + Value: 2, + Usage: "Write output as a v1 or v2 format car", }, }, }, @@ -122,6 +141,19 @@ func main1() int { }, }, }, + { + Name: "import", + Usage: "Import a block into a car file", + Action: ImportCar, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "codec", + Aliases: []string{"c"}, + Usage: "The codec the block data should be interpreted with", + Value: multicodec.DagJson.String(), + }, + }, + }, { Name: "index", Aliases: []string{"i"}, @@ -143,7 +175,7 @@ func main1() int { }, { Name: "list", - Aliases: []string{"l"}, + Aliases: []string{"l", "ls"}, Usage: "List the CIDs in a car", Action: ListCar, Flags: []cli.Flag{ diff --git a/cmd/car/concatinate.go b/cmd/car/concatinate.go new file mode 100644 index 00000000..eb068d5c --- /dev/null +++ b/cmd/car/concatinate.go @@ -0,0 +1,79 @@ +package main + +import ( + "fmt" + "io" + "os" + + "github.com/ipld/go-car/v2" + "github.com/ipld/go-car/v2/blockstore" + "github.com/urfave/cli/v2" +) + +// CatCar will concatinate the blocks from a set of source car files together into a +// combined destination car file. +// The root of the destination car will be the roots of the last specified source car. +func CatCar(c *cli.Context) error { + var err error + if c.Args().Len() == 0 { + return fmt.Errorf("a least one source from must be specified") + } + + if !c.IsSet("file") { + return fmt.Errorf("a file destination must be specified") + } + + options := []car.Option{} + switch c.Int("version") { + case 1: + options = []car.Option{blockstore.WriteAsCarV1(true)} + case 2: + // already the default + default: + return fmt.Errorf("invalid CAR version %d", c.Int("version")) + } + + // peak at final root + lst := c.Args().Get(c.Args().Len() - 1) + lstStore, err := blockstore.OpenReadOnly(lst) + if err != nil { + return err + } + roots, err := lstStore.Roots() + if err != nil { + return err + } + _ = lstStore.Close() + + cdest, err := blockstore.OpenReadWrite(c.String("file"), roots, options...) + if err != nil { + return err + } + + for _, src := range c.Args().Slice() { + f, err := os.Open(src) + if err != nil { + return err + } + blkRdr, err := car.NewBlockReader(f) + if err != nil { + return err + } + blk, err := blkRdr.Next() + for err != io.EOF { + if err := cdest.Put(c.Context, blk); err != nil { + return err + } + blk, err = blkRdr.Next() + if err != nil && err != io.EOF { + return err + } + } + + if err := f.Close(); err != nil { + return err + } + } + + return cdest.Finalize() +} diff --git a/cmd/car/convert.go b/cmd/car/convert.go index 5322a434..1bf37aa5 100644 --- a/cmd/car/convert.go +++ b/cmd/car/convert.go @@ -20,6 +20,7 @@ import ( "github.com/ipld/go-ipld-prime/traversal" "github.com/ipld/go-ipld-prime/traversal/selector" selectorParser "github.com/ipld/go-ipld-prime/traversal/selector/parse" + "github.com/multiformats/go-multicodec" "github.com/multiformats/go-multihash" "github.com/urfave/cli/v2" ) @@ -48,8 +49,8 @@ func proxyCid(proto cidlink.LinkPrototype) (cid.Cid, error) { // ConvertCar will will re-write the blocks in a car to a specified codec. func ConvertCar(c *cli.Context) error { - if c.Args().Len() < 3 { - return fmt.Errorf("Usage: convert ") + if c.Args().Len() < 2 { + return fmt.Errorf("Usage: convert [codec]") } output := c.Args().Get(1) @@ -59,8 +60,18 @@ func ConvertCar(c *cli.Context) error { } _ = os.Remove(output) + convertTo := multicodec.DagJson + codec := "" + if c.Args().Len() > 2 { + codec = c.Args().Get(2) + } + for _, candidate := range multicodec.KnownCodes() { + if candidate.String() == codec { + convertTo = candidate + } + } proto := cidlink.LinkPrototype{ - Prefix: cid.NewPrefixV1(cid.DagCBOR, multihash.SHA2_256), + Prefix: cid.NewPrefixV1(uint64(convertTo), multihash.SHA2_256), } p, err := proxyCid(proto) if err != nil { @@ -171,7 +182,8 @@ func ConvertCar(c *cli.Context) error { } return nil }) - workMap[blkCid] = &children{t: 0, old: old, new: make([]cid.Cid, len(old))} + child := children{t: 0, done: false, old: old, new: make([]cid.Cid, len(old))} + workMap[blkCid] = &child } // Step 3: for nodes with no-uncoverted children, transform the node, and convert. @@ -180,27 +192,37 @@ func ConvertCar(c *cli.Context) error { for done < len(workMap) { for c := range workMap { if workMap[c].t == len(workMap[c].old) && !workMap[c].done { - // Step 3.1: transform the node using old->new map + v := workMap[c] + var newRoot ipld.Node lnk := cidlink.Link{Cid: c} ns, _ = nsc(lnk, ipld.LinkContext{}) oldRoot, err := ls.Load(ipld.LinkContext{}, lnk, ns) if err != nil { return err } - newRoot, err := traversal.WalkTransforming(oldRoot, xar, func(p traversal.Progress, n datamodel.Node) (datamodel.Node, error) { - if n.Kind() == datamodel.Kind_Link { - nlk, _ := n.AsLink() - oldCid := nlk.(cidlink.Link).Cid - for i, c := range workMap[c].old { - if c.Equals(oldCid) { - newLk := basicnode.NewLink(cidlink.Link{Cid: workMap[c].new[i]}) - return newLk, nil + if len(v.old) == 0 { + // shortcut on leaf nodes. + newRoot = oldRoot + } else { + // Step 3.1: transform the node using old->new map + newRoot, err = traversal.WalkTransforming(oldRoot, xar, func(p traversal.Progress, n datamodel.Node) (datamodel.Node, error) { + if n.Kind() == datamodel.Kind_Link { + nlk, _ := n.AsLink() + oldCid := nlk.(cidlink.Link).Cid + for i, c := range v.old { + if c.Equals(oldCid) { + newLk := basicnode.NewLink(cidlink.Link{Cid: v.new[i]}) + return newLk, nil + } } + return nil, fmt.Errorf("could not find link %s in workmap: %v", oldCid, v.old) } - return nil, fmt.Errorf("could not find link %s in workmap", oldCid) + return n, nil + }) + if err != nil { + return err } - return n, nil - }) + } // Step 3.2: serialize into output datastore newLnk, err := outls.Store(ipld.LinkContext{}, proto, newRoot) if err != nil { @@ -211,9 +233,9 @@ func ConvertCar(c *cli.Context) error { // Step 3.3: update workmap indicating parents should transform this child. for d := range workMap { for i, o := range workMap[d].old { - if o == newCid { - workMap[d].new[i] = newCid - workMap[d].t++ + if o.Equals(c) { + (*workMap[d]).new[i] = newCid + (*workMap[d]).t++ } } } diff --git a/cmd/car/create.go b/cmd/car/create.go index de9f6abd..bfa988ab 100644 --- a/cmd/car/create.go +++ b/cmd/car/create.go @@ -32,16 +32,12 @@ func CreateCar(c *cli.Context) error { } // make a cid with the right length that we eventually will patch with the root. - hasher, err := multihash.GetHasher(multihash.SHA2_256) + proxyRoot, err := proxyCid(cidlink.LinkPrototype{ + Prefix: cid.NewPrefixV1(uint64(multicodec.DagPb), multihash.SHA2_256), + }) if err != nil { return err } - digest := hasher.Sum([]byte{}) - hash, err := multihash.Encode(digest, multihash.SHA2_256) - if err != nil { - return err - } - proxyRoot := cid.NewCidV1(uint64(multicodec.DagPb), hash) options := []car.Option{} switch c.Int("version") { diff --git a/cmd/car/import.go b/cmd/car/import.go new file mode 100644 index 00000000..4bcdaa91 --- /dev/null +++ b/cmd/car/import.go @@ -0,0 +1,67 @@ +package main + +import ( + "io" + "os" + + "github.com/ipfs/go-cid" + "github.com/ipld/go-car/v2" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" + "github.com/ipld/go-ipld-prime/storage/memstore" + selectorparse "github.com/ipld/go-ipld-prime/traversal/selector/parse" + "github.com/multiformats/go-multicodec" + "github.com/multiformats/go-multihash" + "github.com/urfave/cli/v2" +) + +// ImportCar will take a file or stream representing a block of data +// and create a car in the specified codec such that the data is packaged +// into a single-block car file. +func ImportCar(c *cli.Context) error { + var err error + inStream := os.Stdin + if c.Args().Len() >= 1 && c.Args().First() != "-" { + inStream, err = os.Open(c.Args().First()) + if err != nil { + return err + } + } + data, err := io.ReadAll(inStream) + if err != nil { + return err + } + + convertTo := multicodec.Raw + for _, candidate := range multicodec.KnownCodes() { + if candidate.String() == c.String("codec") { + convertTo = candidate + } + } + + proto := cid.Prefix{ + Version: 1, + Codec: uint64(convertTo), + MhType: multihash.SHA2_256, + MhLength: -1, + } + root, err := proto.Sum(data) + if err != nil { + return err + } + + ls := cidlink.DefaultLinkSystem() + store := memstore.Store{} + store.Put(c.Context, string(root.KeyString()), data) + ls.SetReadStorage(&store) + + outStream := os.Stdout + if c.Args().Len() >= 2 { + outStream, err = os.Create(c.Args().Get(1)) + if err != nil { + return err + } + defer outStream.Close() + } + _, err = car.TraverseV1(c.Context, &ls, root, selectorparse.CommonSelector_MatchPoint, outStream) + return err +} diff --git a/cmd/car/testdata/script/concatinate.txt b/cmd/car/testdata/script/concatinate.txt new file mode 100644 index 00000000..dbe5f039 --- /dev/null +++ b/cmd/car/testdata/script/concatinate.txt @@ -0,0 +1,23 @@ +stdin filteredroot.txt +car filter ${INPUTS}/sample-wrapped-v2.car out.car +! stderr . +car list out.car +! stderr . +cmp stdout filteredroot.txt + +stdin filteredcids.txt +car filter ${INPUTS}/sample-wrapped-v2.car out-2.car +car list out-2.car +! stderr . +cmp stdout filteredcids.txt + +car concatinate -f combined.car out.car out-2.car +! stderr . +car list combined.car +stdout -count=3 '^bafy' + +-- filteredroot.txt -- +bafy2bzaced4ueelaegfs5fqu4tzsh6ywbbpfk3cxppupmxfdhbpbhzawfw5oy +-- filteredcids.txt -- +bafy2bzacebohz654namrgmwjjx4qmtwgxixsd7pn4tlanyrc3g3hwj75hlxrw +bafy2bzaceaqtiesyfqd2jibmofz22oolguzf5wscwh73rmeypglfu2xhkptri \ No newline at end of file diff --git a/cmd/car/testdata/script/convert.txt b/cmd/car/testdata/script/convert.txt new file mode 100644 index 00000000..cff948ad --- /dev/null +++ b/cmd/car/testdata/script/convert.txt @@ -0,0 +1,4 @@ +car convert ${INPUTS}/sample-wrapped-v2.car out.car dagjson +! stderr . +car list out.car +stdout -count=1049 '^baguq' diff --git a/cmd/car/testdata/script/import.txt b/cmd/car/testdata/script/import.txt new file mode 100644 index 00000000..2d483011 --- /dev/null +++ b/cmd/car/testdata/script/import.txt @@ -0,0 +1,11 @@ +env FOO_CID='bafkreicgzc7pgvw5mdtsfboafwkqtsdmtyxi2hv5if6uifq6z6pwtmjira' + +stdin foo.txt +car import -c raw - out.car +car ls out.car +stdout -count=1 '^bafk' +car gb out.car $FOO_CID +cmp stdout foo.txt + +-- foo.txt -- +foo content \ No newline at end of file