Skip to content

Commit

Permalink
feat(gateway): TAR response format (#9029)
Browse files Browse the repository at this point in the history
Implementation of IPIP-288 (ipfs/specs#288)

Co-authored-by: Marcin Rataj <[email protected]>
  • Loading branch information
hacdias and lidel authored Nov 9, 2022
1 parent 3574cae commit a210abd
Show file tree
Hide file tree
Showing 10 changed files with 222 additions and 12 deletions.
18 changes: 13 additions & 5 deletions core/corehttp/gateway_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,10 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
carVersion := formatParams["version"]
i.serveCAR(r.Context(), w, r, resolvedPath, contentPath, carVersion, begin)
return
case "application/x-tar":
logger.Debugw("serving tar file", "path", contentPath)
i.serveTAR(r.Context(), w, r, resolvedPath, contentPath, begin, logger)
return
default: // catch-all for unsuported application/vnd.*
err := fmt.Errorf("unsupported format %q", responseFormat)
webError(w, "failed respond with requested content type", err, http.StatusBadRequest)
Expand Down Expand Up @@ -842,9 +846,10 @@ func getEtag(r *http.Request, cid cid.Cid) string {
responseFormat, _, err := customResponseFormat(r)
if err == nil && responseFormat != "" {
// application/vnd.ipld.foo → foo
f := responseFormat[strings.LastIndex(responseFormat, ".")+1:]
// Etag: "cid.foo" (gives us nice compression together with Content-Disposition in block (raw) and car responses)
suffix = `.` + f + suffix
// application/x-bar → x-bar
shortFormat := responseFormat[strings.LastIndexAny(responseFormat, "/.")+1:]
// Etag: "cid.shortFmt" (gives us nice compression together with Content-Disposition in block (raw) and car responses)
suffix = `.` + shortFormat + suffix
}
// TODO: include selector suffix when https://github.com/ipfs/kubo/issues/8769 lands
return prefix + cid.String() + suffix
Expand All @@ -859,14 +864,17 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string]
return "application/vnd.ipld.raw", nil, nil
case "car":
return "application/vnd.ipld.car", nil, nil
case "tar":
return "application/x-tar", nil, nil
}
}
// Browsers and other user agents will send Accept header with generic types like:
// Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8
// We only care about explciit, vendor-specific content-types.
// We only care about explicit, vendor-specific content-types.
for _, accept := range r.Header.Values("Accept") {
// respond to the very first ipld content type
if strings.HasPrefix(accept, "application/vnd.ipld") {
if strings.HasPrefix(accept, "application/vnd.ipld") ||
strings.HasPrefix(accept, "application/x-tar") {
mediatype, params, err := mime.ParseMediaType(accept)
if err != nil {
return "", nil, err
Expand Down
92 changes: 92 additions & 0 deletions core/corehttp/gateway_handler_tar.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package corehttp

import (
"context"
"html"
"net/http"
"time"

files "github.com/ipfs/go-ipfs-files"
ipath "github.com/ipfs/interface-go-ipfs-core/path"
"github.com/ipfs/kubo/tracing"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"go.uber.org/zap"
)

var unixEpochTime = time.Unix(0, 0)

func (i *gatewayHandler) serveTAR(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, logger *zap.SugaredLogger) {
ctx, span := tracing.Span(ctx, "Gateway", "ServeTAR", trace.WithAttributes(attribute.String("path", resolvedPath.String())))
defer span.End()

ctx, cancel := context.WithCancel(ctx)
defer cancel()

// Get Unixfs file
file, err := i.api.Unixfs().Get(ctx, resolvedPath)
if err != nil {
webError(w, "ipfs cat "+html.EscapeString(contentPath.String()), err, http.StatusBadRequest)
return
}
defer file.Close()

rootCid := resolvedPath.Cid()

// Set Cache-Control and read optional Last-Modified time
modtime := addCacheControlHeaders(w, r, contentPath, rootCid)

// Weak Etag W/ because we can't guarantee byte-for-byte identical
// responses, but still want to benefit from HTTP Caching. Two TAR
// responses for the same CID will be logically equivalent,
// but when TAR is streamed, then in theory, files and directories
// may arrive in different order (depends on TAR lib and filesystem/inodes).
etag := `W/` + getEtag(r, rootCid)
w.Header().Set("Etag", etag)

// Finish early if Etag match
if r.Header.Get("If-None-Match") == etag {
w.WriteHeader(http.StatusNotModified)
return
}

// Set Content-Disposition
var name string
if urlFilename := r.URL.Query().Get("filename"); urlFilename != "" {
name = urlFilename
} else {
name = rootCid.String() + ".tar"
}
setContentDispositionHeader(w, name, "attachment")

// Construct the TAR writer
tarw, err := files.NewTarWriter(w)
if err != nil {
webError(w, "could not build tar writer", err, http.StatusInternalServerError)
return
}
defer tarw.Close()

// Sets correct Last-Modified header. This code is borrowed from the standard
// library (net/http/server.go) as we cannot use serveFile without throwing the entire
// TAR into the memory first.
if !(modtime.IsZero() || modtime.Equal(unixEpochTime)) {
w.Header().Set("Last-Modified", modtime.UTC().Format(http.TimeFormat))
}

w.Header().Set("Content-Type", "application/x-tar")
w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^)

// The TAR has a top-level directory (or file) named by the CID.
if err := tarw.WriteFile(file, rootCid.String()); err != nil {
w.Header().Set("X-Stream-Error", err.Error())
// Trailer headers do not work in web browsers
// (see https://github.com/mdn/browser-compat-data/issues/14703)
// and we have limited options around error handling in browser contexts.
// To improve UX/DX, we finish response stream with error message, allowing client to
// (1) detect error by having corrupted TAR
// (2) be able to reason what went wrong by instecting the tail of TAR stream
_, _ = w.Write([]byte(err.Error()))
return
}
}
21 changes: 20 additions & 1 deletion docs/changelogs/v0.17.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,34 @@ Below is an outline of all that is in this release, so you get a sense of all th
- [Kubo changelog v0.17](#kubo-changelog-v017)
- [v0.17.0](#v0170)
- [Overview](#overview)
- [TOC](#toc)
- [🔦 Highlights](#-highlights)
- [TAR Response Format on Gateways](#tar-response-format-on-gateways)
- [Changelog](#changelog)
- [Contributors](#contributors)


### 🔦 Highlights

<!-- TODO -->

#### TAR Response Format on Gateways

Implemented [IPIP-288](https://github.com/ipfs/specs/pull/288) which adds
support for requesting deserialized UnixFS directory as a TAR stream.

HTTP clients can request TAR response by passing the `?format=tar` URL
parameter, or setting `Accept: application/x-tar` HTTP header:

```console
$ export DIR_CID=bafybeigccimv3zqm5g4jt363faybagywkvqbrismoquogimy7kvz2sj7sq
$ curl -H "Accept: application/x-tar" "http://127.0.0.1:8080/ipfs/$DIR_CID" > dir.tar
$ curl "http://127.0.0.1:8080/ipfs/$DIR_CID?format=tar" | tar xv
bafybeigccimv3zqm5g4jt363faybagywkvqbrismoquogimy7kvz2sj7sq
bafybeigccimv3zqm5g4jt363faybagywkvqbrismoquogimy7kvz2sj7sq/1 - Barrel - Part 1 - alt.txt
bafybeigccimv3zqm5g4jt363faybagywkvqbrismoquogimy7kvz2sj7sq/1 - Barrel - Part 1 - transcript.txt
bafybeigccimv3zqm5g4jt363faybagywkvqbrismoquogimy7kvz2sj7sq/1 - Barrel - Part 1.png
```

### Changelog

<!-- TODO -->
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/kubo-as-a-library/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ go 1.17
replace github.com/ipfs/kubo => ./../../..

require (
github.com/ipfs/go-ipfs-files v0.1.1
github.com/ipfs/go-ipfs-files v0.2.0
github.com/ipfs/interface-go-ipfs-core v0.7.0
github.com/ipfs/kubo v0.14.0-rc1
github.com/libp2p/go-libp2p v0.23.2
Expand Down
4 changes: 2 additions & 2 deletions docs/examples/kubo-as-a-library/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -559,8 +559,8 @@ github.com/ipfs/go-ipfs-exchange-offline v0.3.0 h1:c/Dg8GDPzixGd0MC8Jh6mjOwU57uY
github.com/ipfs/go-ipfs-exchange-offline v0.3.0/go.mod h1:MOdJ9DChbb5u37M1IcbrRB02e++Z7521fMxqCNRrz9s=
github.com/ipfs/go-ipfs-files v0.0.3/go.mod h1:INEFm0LL2LWXBhNJ2PMIIb2w45hpXgPjNoE7yA8Y1d4=
github.com/ipfs/go-ipfs-files v0.0.8/go.mod h1:wiN/jSG8FKyk7N0WyctKSvq3ljIa2NNTiZB55kpTdOs=
github.com/ipfs/go-ipfs-files v0.1.1 h1:/MbEowmpLo9PJTEQk16m9rKzUHjeP4KRU9nWJyJO324=
github.com/ipfs/go-ipfs-files v0.1.1/go.mod h1:8xkIrMWH+Y5P7HvJ4Yc5XWwIW2e52dyXUiC0tZyjDbM=
github.com/ipfs/go-ipfs-files v0.2.0 h1:z6MCYHQSZpDWpUSK59Kf0ajP1fi4gLCf6fIulVsp8A8=
github.com/ipfs/go-ipfs-files v0.2.0/go.mod h1:vT7uaQfIsprKktzbTPLnIsd+NGw9ZbYwSq0g3N74u0M=
github.com/ipfs/go-ipfs-keystore v0.0.2 h1:Fa9xg9IFD1VbiZtrNLzsD0GuELVHUFXCWF64kCPfEXU=
github.com/ipfs/go-ipfs-keystore v0.0.2/go.mod h1:H49tRmibOEs7gLMgbOsjC4dqh1u5e0R/SWuc2ScfgSo=
github.com/ipfs/go-ipfs-pinner v0.2.1 h1:kw9hiqh2p8TatILYZ3WAfQQABby7SQARdrdA+5Z5QfY=
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ require (
github.com/ipfs/go-ipfs-cmds v0.8.1
github.com/ipfs/go-ipfs-exchange-interface v0.2.0
github.com/ipfs/go-ipfs-exchange-offline v0.3.0
github.com/ipfs/go-ipfs-files v0.1.1
github.com/ipfs/go-ipfs-files v0.2.0
github.com/ipfs/go-ipfs-keystore v0.0.2
github.com/ipfs/go-ipfs-pinner v0.2.1
github.com/ipfs/go-ipfs-posinfo v0.0.1
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -552,8 +552,8 @@ github.com/ipfs/go-ipfs-exchange-offline v0.3.0 h1:c/Dg8GDPzixGd0MC8Jh6mjOwU57uY
github.com/ipfs/go-ipfs-exchange-offline v0.3.0/go.mod h1:MOdJ9DChbb5u37M1IcbrRB02e++Z7521fMxqCNRrz9s=
github.com/ipfs/go-ipfs-files v0.0.3/go.mod h1:INEFm0LL2LWXBhNJ2PMIIb2w45hpXgPjNoE7yA8Y1d4=
github.com/ipfs/go-ipfs-files v0.0.8/go.mod h1:wiN/jSG8FKyk7N0WyctKSvq3ljIa2NNTiZB55kpTdOs=
github.com/ipfs/go-ipfs-files v0.1.1 h1:/MbEowmpLo9PJTEQk16m9rKzUHjeP4KRU9nWJyJO324=
github.com/ipfs/go-ipfs-files v0.1.1/go.mod h1:8xkIrMWH+Y5P7HvJ4Yc5XWwIW2e52dyXUiC0tZyjDbM=
github.com/ipfs/go-ipfs-files v0.2.0 h1:z6MCYHQSZpDWpUSK59Kf0ajP1fi4gLCf6fIulVsp8A8=
github.com/ipfs/go-ipfs-files v0.2.0/go.mod h1:vT7uaQfIsprKktzbTPLnIsd+NGw9ZbYwSq0g3N74u0M=
github.com/ipfs/go-ipfs-keystore v0.0.2 h1:Fa9xg9IFD1VbiZtrNLzsD0GuELVHUFXCWF64kCPfEXU=
github.com/ipfs/go-ipfs-keystore v0.0.2/go.mod h1:H49tRmibOEs7gLMgbOsjC4dqh1u5e0R/SWuc2ScfgSo=
github.com/ipfs/go-ipfs-pinner v0.2.1 h1:kw9hiqh2p8TatILYZ3WAfQQABby7SQARdrdA+5Z5QfY=
Expand Down
Binary file added test/sharness/t0122-gateway-tar-data/inside-root.car
Binary file not shown.
Binary file not shown.
91 changes: 91 additions & 0 deletions test/sharness/t0122-gateway-tar.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#!/usr/bin/env bash

test_description="Test HTTP Gateway TAR (application/x-tar) Support"

. lib/test-lib.sh

test_init_ipfs
test_launch_ipfs_daemon_without_network

OUTSIDE_ROOT_CID="bafybeicaj7kvxpcv4neaqzwhrqqmdstu4dhrwfpknrgebq6nzcecfucvyu"
INSIDE_ROOT_CID="bafybeibfevfxlvxp5vxobr5oapczpf7resxnleb7tkqmdorc4gl5cdva3y"

test_expect_success "Add the test directory" '
mkdir -p rootDir/ipfs &&
mkdir -p rootDir/ipns &&
mkdir -p rootDir/api &&
mkdir -p rootDir/ą/ę &&
echo "I am a txt file on path with utf8" > rootDir/ą/ę/file-źł.txt &&
echo "I am a txt file in confusing /api dir" > rootDir/api/file.txt &&
echo "I am a txt file in confusing /ipfs dir" > rootDir/ipfs/file.txt &&
echo "I am a txt file in confusing /ipns dir" > rootDir/ipns/file.txt &&
DIR_CID=$(ipfs add -Qr --cid-version 1 rootDir) &&
FILE_CID=$(ipfs files stat --enc=json /ipfs/$DIR_CID/ą/ę/file-źł.txt | jq -r .Hash) &&
FILE_SIZE=$(ipfs files stat --enc=json /ipfs/$DIR_CID/ą/ę/file-źł.txt | jq -r .Size)
echo "$FILE_CID / $FILE_SIZE"
'

test_expect_success "GET TAR with format=tar and extract" '
curl "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=tar" | tar -x
'

test_expect_success "GET TAR with 'Accept: application/x-tar' and extract" '
curl -H "Accept: application/x-tar" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" | tar -x
'

test_expect_success "GET TAR with format=tar has expected Content-Type" '
curl -sD - "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=tar" > curl_output_filename 2>&1 &&
test_should_contain "Content-Disposition: attachment;" curl_output_filename &&
test_should_contain "Etag: W/\"$FILE_CID.x-tar" curl_output_filename &&
test_should_contain "Content-Type: application/x-tar" curl_output_filename
'

test_expect_success "GET TAR with 'Accept: application/x-tar' has expected Content-Type" '
curl -sD - -H "Accept: application/x-tar" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" > curl_output_filename 2>&1 &&
test_should_contain "Content-Disposition: attachment;" curl_output_filename &&
test_should_contain "Etag: W/\"$FILE_CID.x-tar" curl_output_filename &&
test_should_contain "Content-Type: application/x-tar" curl_output_filename
'

test_expect_success "GET TAR has expected root file" '
rm -rf outputDir && mkdir outputDir &&
curl "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=tar" | tar -x -C outputDir &&
test -f "outputDir/$FILE_CID" &&
echo "I am a txt file on path with utf8" > expected &&
test_cmp expected outputDir/$FILE_CID
'

test_expect_success "GET TAR has expected root directory" '
rm -rf outputDir && mkdir outputDir &&
curl "http://127.0.0.1:$GWAY_PORT/ipfs/$DIR_CID?format=tar" | tar -x -C outputDir &&
test -d "outputDir/$DIR_CID" &&
echo "I am a txt file on path with utf8" > expected &&
test_cmp expected outputDir/$DIR_CID/ą/ę/file-źł.txt
'

test_expect_success "GET TAR with explicit ?filename= succeeds with modified Content-Disposition header" "
curl -fo actual -D actual_headers 'http://127.0.0.1:$GWAY_PORT/ipfs/$DIR_CID?filename=testтест.tar&format=tar' &&
grep -F 'Content-Disposition: attachment; filename=\"test____.tar\"; filename*=UTF-8'\'\''test%D1%82%D0%B5%D1%81%D1%82.tar' actual_headers
"

test_expect_success "Add CARs with relative paths to test with" '
ipfs dag import ../t0122-gateway-tar-data/outside-root.car > import_output &&
test_should_contain $OUTSIDE_ROOT_CID import_output &&
ipfs dag import ../t0122-gateway-tar-data/inside-root.car > import_output &&
test_should_contain $INSIDE_ROOT_CID import_output
'

test_expect_success "GET TAR with relative paths outside root fails" '
curl -o - "http://127.0.0.1:$GWAY_PORT/ipfs/$OUTSIDE_ROOT_CID?format=tar" > curl_output_filename &&
test_should_contain "relative UnixFS paths outside the root are now allowed" curl_output_filename
'

test_expect_success "GET TAR with relative paths inside root works" '
rm -rf outputDir && mkdir outputDir &&
curl "http://127.0.0.1:$GWAY_PORT/ipfs/$INSIDE_ROOT_CID?format=tar" | tar -x -C outputDir &&
test -f outputDir/$INSIDE_ROOT_CID/foobar/file
'

test_kill_ipfs_daemon

test_done

0 comments on commit a210abd

Please sign in to comment.