Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(gateway): TAR response format #9029

Merged
merged 30 commits into from
Nov 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
de53536
feat: gateway support for tar
hacdias Jun 9, 2022
f79610a
fix: kubo namings
hacdias Aug 15, 2022
e8180ea
fix: accept 'Accept: application/x-tar' header
hacdias Oct 3, 2022
c5b773f
test: add tar file tests with accept header
hacdias Oct 3, 2022
4d232c6
wip: basepath tar writer
hacdias Oct 4, 2022
fd195f3
test: separate tar testfile§
hacdias Oct 4, 2022
72af6b2
wip: base dir tar writer
hacdias Oct 4, 2022
97d2023
wip: remove duplicate code, cleanup docs and text
hacdias Oct 5, 2022
6165b62
wip: go mod tidy
hacdias Oct 5, 2022
3a228e4
chore: go mod tidy
hacdias Oct 6, 2022
31ee4e5
wip: add inside and outside root cars for testing
hacdias Oct 6, 2022
766fa01
test: cleanup
hacdias Oct 6, 2022
56798ed
tests: test with relative paths inside and outside directory
hacdias Oct 6, 2022
0d6fad1
test: add missing line
hacdias Oct 6, 2022
51bf572
tar: force close connection upon error
hacdias Oct 7, 2022
8eee493
style: wrap line
hacdias Oct 10, 2022
e6b099a
test: use test_should_contain
hacdias Oct 10, 2022
f792fdb
test: simplify test and s/HASH/CID/g
hacdias Oct 10, 2022
2059d36
fix: add CID as top-level name for consistency
hacdias Oct 10, 2022
bc410af
test: add utf-8 testing and root directory/file name check
hacdias Oct 10, 2022
bd4fd1f
fix: return 400 if not unixfs
hacdias Oct 10, 2022
5687897
test: check if file exists and is on correct directory
hacdias Oct 10, 2022
7a352a9
refactor: use abort handler from #9333
hacdias Oct 13, 2022
be7cf70
refactor: print error instead of aborting
hacdias Nov 8, 2022
0a54117
ignore w.Write error as not much can be done now
hacdias Nov 8, 2022
8870480
update to go-ipfs-files@619bbe4
hacdias Nov 8, 2022
5386136
go mod tidy
hacdias Nov 8, 2022
7411e70
chore: go-ipfs-files v0.2.0
lidel Nov 9, 2022
4ca3940
refactor: cleanup Etag handling with TAR
lidel Nov 9, 2022
ea6bb9a
docs: add TAR to changelog
lidel Nov 9, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions core/corehttp/gateway_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,10 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request
carVersion := formatParams["version"]
i.serveCAR(r.Context(), w, r, resolvedPath, contentPath, carVersion, begin)
return
case "application/x-tar":
logger.Debugw("serving tar file", "path", contentPath)
i.serveTAR(r.Context(), w, r, resolvedPath, contentPath, begin, logger)
return
default: // catch-all for unsuported application/vnd.*
err := fmt.Errorf("unsupported format %q", responseFormat)
webError(w, "failed respond with requested content type", err, http.StatusBadRequest)
Expand Down Expand Up @@ -842,9 +846,10 @@ func getEtag(r *http.Request, cid cid.Cid) string {
responseFormat, _, err := customResponseFormat(r)
if err == nil && responseFormat != "" {
// application/vnd.ipld.foo → foo
f := responseFormat[strings.LastIndex(responseFormat, ".")+1:]
// Etag: "cid.foo" (gives us nice compression together with Content-Disposition in block (raw) and car responses)
suffix = `.` + f + suffix
// application/x-bar → x-bar
shortFormat := responseFormat[strings.LastIndexAny(responseFormat, "/.")+1:]
// Etag: "cid.shortFmt" (gives us nice compression together with Content-Disposition in block (raw) and car responses)
suffix = `.` + shortFormat + suffix
}
// TODO: include selector suffix when https://github.com/ipfs/kubo/issues/8769 lands
return prefix + cid.String() + suffix
Expand All @@ -859,14 +864,17 @@ func customResponseFormat(r *http.Request) (mediaType string, params map[string]
return "application/vnd.ipld.raw", nil, nil
case "car":
return "application/vnd.ipld.car", nil, nil
case "tar":
return "application/x-tar", nil, nil
}
}
// Browsers and other user agents will send Accept header with generic types like:
// Accept:text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8
// We only care about explciit, vendor-specific content-types.
// We only care about explicit, vendor-specific content-types.
for _, accept := range r.Header.Values("Accept") {
// respond to the very first ipld content type
if strings.HasPrefix(accept, "application/vnd.ipld") {
if strings.HasPrefix(accept, "application/vnd.ipld") ||
strings.HasPrefix(accept, "application/x-tar") {
mediatype, params, err := mime.ParseMediaType(accept)
if err != nil {
return "", nil, err
Expand Down
92 changes: 92 additions & 0 deletions core/corehttp/gateway_handler_tar.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package corehttp

import (
"context"
"html"
"net/http"
"time"

files "github.com/ipfs/go-ipfs-files"
ipath "github.com/ipfs/interface-go-ipfs-core/path"
"github.com/ipfs/kubo/tracing"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"go.uber.org/zap"
)

var unixEpochTime = time.Unix(0, 0)

func (i *gatewayHandler) serveTAR(ctx context.Context, w http.ResponseWriter, r *http.Request, resolvedPath ipath.Resolved, contentPath ipath.Path, begin time.Time, logger *zap.SugaredLogger) {
ctx, span := tracing.Span(ctx, "Gateway", "ServeTAR", trace.WithAttributes(attribute.String("path", resolvedPath.String())))
defer span.End()

ctx, cancel := context.WithCancel(ctx)
defer cancel()

// Get Unixfs file
file, err := i.api.Unixfs().Get(ctx, resolvedPath)
if err != nil {
webError(w, "ipfs cat "+html.EscapeString(contentPath.String()), err, http.StatusBadRequest)
return
}
defer file.Close()

rootCid := resolvedPath.Cid()

// Set Cache-Control and read optional Last-Modified time
modtime := addCacheControlHeaders(w, r, contentPath, rootCid)

// Weak Etag W/ because we can't guarantee byte-for-byte identical
// responses, but still want to benefit from HTTP Caching. Two TAR
// responses for the same CID will be logically equivalent,
// but when TAR is streamed, then in theory, files and directories
// may arrive in different order (depends on TAR lib and filesystem/inodes).
etag := `W/` + getEtag(r, rootCid)
w.Header().Set("Etag", etag)

// Finish early if Etag match
if r.Header.Get("If-None-Match") == etag {
w.WriteHeader(http.StatusNotModified)
return
}

// Set Content-Disposition
var name string
if urlFilename := r.URL.Query().Get("filename"); urlFilename != "" {
name = urlFilename
} else {
name = rootCid.String() + ".tar"
}
setContentDispositionHeader(w, name, "attachment")

// Construct the TAR writer
tarw, err := files.NewTarWriter(w)
if err != nil {
webError(w, "could not build tar writer", err, http.StatusInternalServerError)
return
}
defer tarw.Close()

// Sets correct Last-Modified header. This code is borrowed from the standard
// library (net/http/server.go) as we cannot use serveFile without throwing the entire
// TAR into the memory first.
if !(modtime.IsZero() || modtime.Equal(unixEpochTime)) {
w.Header().Set("Last-Modified", modtime.UTC().Format(http.TimeFormat))
}

w.Header().Set("Content-Type", "application/x-tar")
w.Header().Set("X-Content-Type-Options", "nosniff") // no funny business in the browsers :^)

// The TAR has a top-level directory (or file) named by the CID.
if err := tarw.WriteFile(file, rootCid.String()); err != nil {
w.Header().Set("X-Stream-Error", err.Error())
// Trailer headers do not work in web browsers
// (see https://github.com/mdn/browser-compat-data/issues/14703)
// and we have limited options around error handling in browser contexts.
// To improve UX/DX, we finish response stream with error message, allowing client to
// (1) detect error by having corrupted TAR
// (2) be able to reason what went wrong by instecting the tail of TAR stream
_, _ = w.Write([]byte(err.Error()))
return
}
}
21 changes: 20 additions & 1 deletion docs/changelogs/v0.17.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,34 @@ Below is an outline of all that is in this release, so you get a sense of all th
- [Kubo changelog v0.17](#kubo-changelog-v017)
- [v0.17.0](#v0170)
- [Overview](#overview)
- [TOC](#toc)
- [🔦 Highlights](#-highlights)
- [TAR Response Format on Gateways](#tar-response-format-on-gateways)
- [Changelog](#changelog)
- [Contributors](#contributors)


### 🔦 Highlights

<!-- TODO -->

#### TAR Response Format on Gateways

Implemented [IPIP-288](https://github.com/ipfs/specs/pull/288) which adds
support for requesting deserialized UnixFS directory as a TAR stream.

HTTP clients can request TAR response by passing the `?format=tar` URL
parameter, or setting `Accept: application/x-tar` HTTP header:

```console
$ export DIR_CID=bafybeigccimv3zqm5g4jt363faybagywkvqbrismoquogimy7kvz2sj7sq
$ curl -H "Accept: application/x-tar" "http://127.0.0.1:8080/ipfs/$DIR_CID" > dir.tar
$ curl "http://127.0.0.1:8080/ipfs/$DIR_CID?format=tar" | tar xv
bafybeigccimv3zqm5g4jt363faybagywkvqbrismoquogimy7kvz2sj7sq
bafybeigccimv3zqm5g4jt363faybagywkvqbrismoquogimy7kvz2sj7sq/1 - Barrel - Part 1 - alt.txt
bafybeigccimv3zqm5g4jt363faybagywkvqbrismoquogimy7kvz2sj7sq/1 - Barrel - Part 1 - transcript.txt
bafybeigccimv3zqm5g4jt363faybagywkvqbrismoquogimy7kvz2sj7sq/1 - Barrel - Part 1.png
```

### Changelog

<!-- TODO -->
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/kubo-as-a-library/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ go 1.17
replace github.com/ipfs/kubo => ./../../..

require (
github.com/ipfs/go-ipfs-files v0.1.1
github.com/ipfs/go-ipfs-files v0.2.0
github.com/ipfs/interface-go-ipfs-core v0.7.0
github.com/ipfs/kubo v0.14.0-rc1
github.com/libp2p/go-libp2p v0.23.2
Expand Down
4 changes: 2 additions & 2 deletions docs/examples/kubo-as-a-library/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -559,8 +559,8 @@ github.com/ipfs/go-ipfs-exchange-offline v0.3.0 h1:c/Dg8GDPzixGd0MC8Jh6mjOwU57uY
github.com/ipfs/go-ipfs-exchange-offline v0.3.0/go.mod h1:MOdJ9DChbb5u37M1IcbrRB02e++Z7521fMxqCNRrz9s=
github.com/ipfs/go-ipfs-files v0.0.3/go.mod h1:INEFm0LL2LWXBhNJ2PMIIb2w45hpXgPjNoE7yA8Y1d4=
github.com/ipfs/go-ipfs-files v0.0.8/go.mod h1:wiN/jSG8FKyk7N0WyctKSvq3ljIa2NNTiZB55kpTdOs=
github.com/ipfs/go-ipfs-files v0.1.1 h1:/MbEowmpLo9PJTEQk16m9rKzUHjeP4KRU9nWJyJO324=
github.com/ipfs/go-ipfs-files v0.1.1/go.mod h1:8xkIrMWH+Y5P7HvJ4Yc5XWwIW2e52dyXUiC0tZyjDbM=
github.com/ipfs/go-ipfs-files v0.2.0 h1:z6MCYHQSZpDWpUSK59Kf0ajP1fi4gLCf6fIulVsp8A8=
github.com/ipfs/go-ipfs-files v0.2.0/go.mod h1:vT7uaQfIsprKktzbTPLnIsd+NGw9ZbYwSq0g3N74u0M=
github.com/ipfs/go-ipfs-keystore v0.0.2 h1:Fa9xg9IFD1VbiZtrNLzsD0GuELVHUFXCWF64kCPfEXU=
github.com/ipfs/go-ipfs-keystore v0.0.2/go.mod h1:H49tRmibOEs7gLMgbOsjC4dqh1u5e0R/SWuc2ScfgSo=
github.com/ipfs/go-ipfs-pinner v0.2.1 h1:kw9hiqh2p8TatILYZ3WAfQQABby7SQARdrdA+5Z5QfY=
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ require (
github.com/ipfs/go-ipfs-cmds v0.8.1
github.com/ipfs/go-ipfs-exchange-interface v0.2.0
github.com/ipfs/go-ipfs-exchange-offline v0.3.0
github.com/ipfs/go-ipfs-files v0.1.1
github.com/ipfs/go-ipfs-files v0.2.0
github.com/ipfs/go-ipfs-keystore v0.0.2
github.com/ipfs/go-ipfs-pinner v0.2.1
github.com/ipfs/go-ipfs-posinfo v0.0.1
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -552,8 +552,8 @@ github.com/ipfs/go-ipfs-exchange-offline v0.3.0 h1:c/Dg8GDPzixGd0MC8Jh6mjOwU57uY
github.com/ipfs/go-ipfs-exchange-offline v0.3.0/go.mod h1:MOdJ9DChbb5u37M1IcbrRB02e++Z7521fMxqCNRrz9s=
github.com/ipfs/go-ipfs-files v0.0.3/go.mod h1:INEFm0LL2LWXBhNJ2PMIIb2w45hpXgPjNoE7yA8Y1d4=
github.com/ipfs/go-ipfs-files v0.0.8/go.mod h1:wiN/jSG8FKyk7N0WyctKSvq3ljIa2NNTiZB55kpTdOs=
github.com/ipfs/go-ipfs-files v0.1.1 h1:/MbEowmpLo9PJTEQk16m9rKzUHjeP4KRU9nWJyJO324=
github.com/ipfs/go-ipfs-files v0.1.1/go.mod h1:8xkIrMWH+Y5P7HvJ4Yc5XWwIW2e52dyXUiC0tZyjDbM=
github.com/ipfs/go-ipfs-files v0.2.0 h1:z6MCYHQSZpDWpUSK59Kf0ajP1fi4gLCf6fIulVsp8A8=
github.com/ipfs/go-ipfs-files v0.2.0/go.mod h1:vT7uaQfIsprKktzbTPLnIsd+NGw9ZbYwSq0g3N74u0M=
github.com/ipfs/go-ipfs-keystore v0.0.2 h1:Fa9xg9IFD1VbiZtrNLzsD0GuELVHUFXCWF64kCPfEXU=
github.com/ipfs/go-ipfs-keystore v0.0.2/go.mod h1:H49tRmibOEs7gLMgbOsjC4dqh1u5e0R/SWuc2ScfgSo=
github.com/ipfs/go-ipfs-pinner v0.2.1 h1:kw9hiqh2p8TatILYZ3WAfQQABby7SQARdrdA+5Z5QfY=
Expand Down
Binary file not shown.
Binary file not shown.
91 changes: 91 additions & 0 deletions test/sharness/t0122-gateway-tar.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#!/usr/bin/env bash

test_description="Test HTTP Gateway TAR (application/x-tar) Support"

. lib/test-lib.sh

test_init_ipfs
test_launch_ipfs_daemon_without_network

OUTSIDE_ROOT_CID="bafybeicaj7kvxpcv4neaqzwhrqqmdstu4dhrwfpknrgebq6nzcecfucvyu"
INSIDE_ROOT_CID="bafybeibfevfxlvxp5vxobr5oapczpf7resxnleb7tkqmdorc4gl5cdva3y"

test_expect_success "Add the test directory" '
mkdir -p rootDir/ipfs &&
mkdir -p rootDir/ipns &&
mkdir -p rootDir/api &&
mkdir -p rootDir/ą/ę &&
echo "I am a txt file on path with utf8" > rootDir/ą/ę/file-źł.txt &&
echo "I am a txt file in confusing /api dir" > rootDir/api/file.txt &&
echo "I am a txt file in confusing /ipfs dir" > rootDir/ipfs/file.txt &&
echo "I am a txt file in confusing /ipns dir" > rootDir/ipns/file.txt &&
DIR_CID=$(ipfs add -Qr --cid-version 1 rootDir) &&
FILE_CID=$(ipfs files stat --enc=json /ipfs/$DIR_CID/ą/ę/file-źł.txt | jq -r .Hash) &&
FILE_SIZE=$(ipfs files stat --enc=json /ipfs/$DIR_CID/ą/ę/file-źł.txt | jq -r .Size)
echo "$FILE_CID / $FILE_SIZE"
'

test_expect_success "GET TAR with format=tar and extract" '
curl "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=tar" | tar -x
'

test_expect_success "GET TAR with 'Accept: application/x-tar' and extract" '
curl -H "Accept: application/x-tar" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" | tar -x
'

test_expect_success "GET TAR with format=tar has expected Content-Type" '
curl -sD - "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=tar" > curl_output_filename 2>&1 &&
test_should_contain "Content-Disposition: attachment;" curl_output_filename &&
test_should_contain "Etag: W/\"$FILE_CID.x-tar" curl_output_filename &&
test_should_contain "Content-Type: application/x-tar" curl_output_filename
'

test_expect_success "GET TAR with 'Accept: application/x-tar' has expected Content-Type" '
curl -sD - -H "Accept: application/x-tar" "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID" > curl_output_filename 2>&1 &&
test_should_contain "Content-Disposition: attachment;" curl_output_filename &&
test_should_contain "Etag: W/\"$FILE_CID.x-tar" curl_output_filename &&
test_should_contain "Content-Type: application/x-tar" curl_output_filename
'

test_expect_success "GET TAR has expected root file" '
rm -rf outputDir && mkdir outputDir &&
curl "http://127.0.0.1:$GWAY_PORT/ipfs/$FILE_CID?format=tar" | tar -x -C outputDir &&
test -f "outputDir/$FILE_CID" &&
echo "I am a txt file on path with utf8" > expected &&
test_cmp expected outputDir/$FILE_CID
'

test_expect_success "GET TAR has expected root directory" '
rm -rf outputDir && mkdir outputDir &&
curl "http://127.0.0.1:$GWAY_PORT/ipfs/$DIR_CID?format=tar" | tar -x -C outputDir &&
test -d "outputDir/$DIR_CID" &&
echo "I am a txt file on path with utf8" > expected &&
test_cmp expected outputDir/$DIR_CID/ą/ę/file-źł.txt
'

test_expect_success "GET TAR with explicit ?filename= succeeds with modified Content-Disposition header" "
curl -fo actual -D actual_headers 'http://127.0.0.1:$GWAY_PORT/ipfs/$DIR_CID?filename=testтест.tar&format=tar' &&
grep -F 'Content-Disposition: attachment; filename=\"test____.tar\"; filename*=UTF-8'\'\''test%D1%82%D0%B5%D1%81%D1%82.tar' actual_headers
"

test_expect_success "Add CARs with relative paths to test with" '
ipfs dag import ../t0122-gateway-tar-data/outside-root.car > import_output &&
test_should_contain $OUTSIDE_ROOT_CID import_output &&
ipfs dag import ../t0122-gateway-tar-data/inside-root.car > import_output &&
test_should_contain $INSIDE_ROOT_CID import_output
'

test_expect_success "GET TAR with relative paths outside root fails" '
curl -o - "http://127.0.0.1:$GWAY_PORT/ipfs/$OUTSIDE_ROOT_CID?format=tar" > curl_output_filename &&
test_should_contain "relative UnixFS paths outside the root are now allowed" curl_output_filename
'

test_expect_success "GET TAR with relative paths inside root works" '
rm -rf outputDir && mkdir outputDir &&
curl "http://127.0.0.1:$GWAY_PORT/ipfs/$INSIDE_ROOT_CID?format=tar" | tar -x -C outputDir &&
test -f outputDir/$INSIDE_ROOT_CID/foobar/file
'

test_kill_ipfs_daemon

test_done