-
-
Notifications
You must be signed in to change notification settings - Fork 3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement path validations #1740
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,3 +28,27 @@ func TestPathParsing(t *testing.T) { | |
} | ||
} | ||
} | ||
|
||
func TestPathCleaning(t *testing.T) { | ||
cases := map[string]string{ | ||
// .. should strip the preceding path segment | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i'm not in favor of special casing the names of links like this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the same logic as POSIX pathname resolution and relative URI resolution. User-specified objects named There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe this should be pushed to ipfs/go-ipld? — On Mon, Sep 21, 2015 at 6:16 PM, Will Glynn [email protected]
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can see similar code being useful in go-ipld, but I think This is required for URIs – even absolute URIs – so web servers including Go's
Why should string There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. agreed @willglynn -- and agreed with plan9 folks:
So this basically means that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @whyrusleeping what reservations do you have re |
||
"/ipfs/QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n/a/../b": "/ipfs/QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n/b", | ||
|
||
// . should be interpreted as "here" | ||
"/ipfs/QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n/a/./b": "/ipfs/QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n/a/b", | ||
|
||
// repeated slashes should collapse to one slash | ||
"/ipfs/QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n/a////b": "/ipfs/QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n/a/b", | ||
|
||
// bare hashes should be interpreted as IPFS paths | ||
"QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n": "/ipfs/QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n", | ||
} | ||
|
||
for p, expected := range cases { | ||
if parsed, err := ParsePath(p); err != nil { | ||
t.Fatalf("path %q failed to parse: %v", p, err) | ||
} else if parsed.String() != expected { | ||
t.Fatalf("expected %q to parse to %q, got %q", p, expected, parsed.String()) | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
package path | ||
|
||
import ( | ||
"errors" | ||
"unicode/utf8" | ||
) | ||
|
||
var ErrPathSegmentEmpty = errors.New("path segment is empty") | ||
var ErrPathSegmentTooLong = errors.New("path segment is too long") | ||
var ErrInvalidUTF8 = errors.New("path is not valid UTF-8") | ||
var ErrRestrictedCharacter = errors.New("path contains a restricted character") | ||
var ErrRestrictedString = errors.New("path is a restricted string") | ||
|
||
const ( | ||
MaximumPathSegmentLength = 255 // matching Linux NAME_MAX | ||
) | ||
|
||
func ValidateSegmentBytes(segment []byte) error { | ||
// check the path segment's length first | ||
if len(segment) == 0 { | ||
return ErrPathSegmentEmpty | ||
} | ||
|
||
// check if it's "." or "..", both of which are not permitted | ||
if (len(segment) == 1 && segment[0] == 0x2e) || (len(segment) == 2 && segment[0] == 0x2e && segment[1] == 0x2e) { | ||
return ErrRestrictedString | ||
} | ||
|
||
// succesively decode UTF-8 codepoints | ||
codepoints := 0 | ||
for i := 0; i < len(segment); { | ||
decoded, decodeLen := utf8.DecodeRune(segment[i:]) | ||
|
||
// did we decode a valid UTF-8 codepoint? | ||
if decoded == utf8.RuneError && decodeLen <= 1 { | ||
// no; segment is invalid UTF-8 | ||
return ErrInvalidUTF8 | ||
} else { | ||
// yes, we decoded one codepoint occupying decodeLen bytes | ||
i += decodeLen | ||
codepoints += 1 | ||
} | ||
|
||
// check decoded rune against character exclusions | ||
if (decoded >= 0 && decoded <= 0x1f) || decoded == 0x7f { | ||
// control character | ||
return ErrRestrictedCharacter | ||
} else if decoded == '/' { | ||
// slash | ||
return ErrRestrictedCharacter | ||
} | ||
|
||
// this character is permitted | ||
// do we have too many codepoints? | ||
if codepoints > MaximumPathSegmentLength { | ||
return ErrPathSegmentTooLong | ||
} | ||
} | ||
|
||
// no violations found | ||
return nil | ||
} | ||
|
||
func ValidateSegment(segment string) error { | ||
return ValidateSegmentBytes([]byte(segment)) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,235 @@ | ||
package path | ||
|
||
import ( | ||
"strings" | ||
"testing" | ||
) | ||
|
||
// these test vectors are chosen as examples of IPFS path validation rules | ||
// these vectors are not themselves the definition of a valid path | ||
|
||
// these strings are not valid path segments, but are valid given a prefix or suffix, or when used as part of a larger path | ||
var invalidPathSegments = []string{ | ||
// empty string | ||
"", | ||
|
||
// special meaning in POSIX, and cleaned away by path.ParsePath() | ||
".", | ||
"..", | ||
} | ||
|
||
// these substrings are not valid in a path segment, but *are* valid in paths | ||
var invalidPathSegmentSubstrings = []string{ | ||
// slash is not permitted in path segments, since it delimits path segments | ||
// it is (obviously) valid in paths | ||
"/", | ||
} | ||
|
||
// these substrings are never valid in a path anywhere, even when prefixed/suffixed with valid UTF-8 | ||
var invalidPathSubstrings = []string{ | ||
// ASCII/Unicode C0 control characters are not permitted | ||
"\u0000", "\u0001", "\u0002", "\u0003", "\u0004", "\u0005", "\u0006", "\u0007", | ||
"\u0008", "\u0009", "\u000a", "\u000b", "\u000c", "\u000d", "\u000e", "\u000f", | ||
"\u0010", "\u0011", "\u0012", "\u0013", "\u0014", "\u0015", "\u0016", "\u0017", | ||
"\u0018", "\u0019", "\u001a", "\u001b", "\u001c", "\u001d", "\u001e", "\u001f", | ||
"\u007f", | ||
|
||
// bytes which are illegal in UTF-8 are not permitted | ||
"\xfe", "\xff", | ||
"\xfe\xff", // the UTF-16 BOM | ||
"\xff\xfe", // test a reversed UTF-16 BOM because why not | ||
|
||
// malformed UTF-8 sequences are not permitted | ||
"\x80", | ||
// TODO: more | ||
|
||
// overlong UTF-8 sequences are not permitted | ||
"\xc0\x80", | ||
"\xe0\x80\x80", | ||
"\xf0\x80\x80\x80", | ||
|
||
// 4-byte UTF-8 sequences above plane 16 are not permitted | ||
"\xf4\x90\x80\x80", // U+110000 | ||
"\xf7\xbf\xbf\xbf", // U+1FFFFF | ||
|
||
// 5- and 6-byte UTF-8 sequences are not permitted | ||
"\xf8\x88\x80\x80\x80", // U+200000 | ||
"\xfb\xbf\xbf\xbf\xbf", // U+3FFFFF | ||
"\xfc\x84\x80\x80\x80\x80", // U+400000 | ||
"\xfd\xbf\xbf\xbf\xbf\xbf", // U+7FFFFF | ||
|
||
// surrogate halves/pairs are not permitted | ||
"\xed\xa0\x80", // U+D800 | ||
"\xed\xaf\xbf", // U+DBFF | ||
"\xed\xb0\x80", // U+DC00 | ||
"\xed\xbf\xbf", // U+DFFF | ||
|
||
"\xed\xa0\x80\xed\xb0\x80", // U+D800 U+DC00 | ||
|
||
// path segments past NAME_MAX bytes: | ||
strings.Repeat("a", 256), | ||
|
||
// path segments past NAME_MAX characters: | ||
strings.Repeat("á", 256), | ||
} | ||
|
||
// these strings are valid path segments | ||
var validPathSegments = []string{ | ||
// path segments just under NAME_MAX bytes: | ||
strings.Repeat("a", 255), | ||
|
||
// path segments just under NAME_MAX characters: | ||
strings.Repeat("á", 255), | ||
} | ||
|
||
// these strings *are* valid path segments, even when prefixed/suffixed | ||
var validPathSubstrings = []string{ | ||
// basic strings are fine | ||
"true", | ||
"1", | ||
"ipfs", | ||
"I P F S", | ||
"IPFS ", | ||
"autorun.inf", | ||
".DS_Store", | ||
"Program Files", | ||
"Progra~1", | ||
|
||
// dots are fine, as long as it's not "." and ".." verbatim | ||
"foo.", | ||
".foo", | ||
"..bar", | ||
"...", | ||
|
||
// Unicode strings are fine | ||
// (Go source files -- and by extension Go string literals -- are UTF-8, just like IPFS paths) | ||
`currencies $€£¥₹₪`, | ||
`…sigh…`, | ||
`“smart quotes”`, | ||
`emoji 😘`, | ||
`ἄλφα βῆτα`, | ||
`АВГД`, | ||
`ᎠᎡᎢᎣ`, | ||
`z̮ͮ̉ͪͥ͛ͦ̌a̾̈̐̎̿̈l̙̺̮ͭͭ̄̂ͤ̈ͅg̶̺̊͋͒͒ͧő̫̹̟͛̓ͭ`, // "zalgo" | ||
|
||
// 2-byte, 3-byte, and 4-byte UTF-8 sequences are okay up to U+10FFFF | ||
"\xc2\x80", | ||
"\xe0\xa0\x80", | ||
"\xf0\x90\x80\x80", | ||
"\xf4\x8f\xbf\xbf", | ||
|
||
// things that are fine in IPFS that might be less fine in other contexts | ||
"\\", // just a backslash | ||
"foo:bar", // problematic on Windows and ~OSX | ||
" ", // look ma, all whitespace | ||
"COM1", // problematic on Windows | ||
"C:\\", // very very problematic on Windows | ||
|
||
// all Unicode codepoints are permitted except those that are specifically excluded, so these things are fine too | ||
"\u00a0", // non-breaking space | ||
"\u0156", // unicode C1 string terminator | ||
"\u0200", // non-breaking space | ||
"\u0300", // combining diacritical mark without anything on which to combine | ||
"\u200d", // zero width joiner (invisible) | ||
"\u202e", // right-to-left override; https://xkcd.com/1137/ | ||
"\ufdd0", // not a character; https://xkcd.com/380/ | ||
"\ufdef", // not a character | ||
"\ufeff", // byte order mark | ||
"\uf8ff", // private use character | ||
"\ufffd", // Unicode replacement character | ||
"\ufffe", // not a character, and sometimes mistakable for a reversed BOM | ||
"\uffff", // not a character, and sometimes mistakable for an EOF | ||
".gi\u200ct", // CVE-2014-9390 | ||
} | ||
|
||
// given a string slice, return a bigger string slice containing prefixes and suffixes | ||
func pathSegmentVariations(segments []string) []string { | ||
out := make([]string, 0, len(segments)*4) | ||
|
||
for _, segment := range segments { | ||
out = append(out, segment) | ||
out = append(out, "foo"+segment) | ||
out = append(out, segment+"bar") | ||
out = append(out, "baz"+segment+"quxx") | ||
} | ||
|
||
return out | ||
} | ||
|
||
func TestValidPathSegments(t *testing.T) { | ||
pathSegments := append( | ||
validPathSegments, | ||
pathSegmentVariations(validPathSubstrings)..., | ||
) | ||
|
||
for _, segment := range pathSegments { | ||
if err := ValidateSegment(segment); err != nil { | ||
t.Fatalf("expected string segment %q to be valid, got %q", segment, err) | ||
} | ||
|
||
if err := ValidateSegmentBytes([]byte(segment)); err != nil { | ||
t.Fatalf("expected []byte segment %q to be valid, got %q", segment, err) | ||
} | ||
|
||
if _, err := ParsePath("/ipfs/QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n/" + segment); err != nil { | ||
t.Fatalf("expected Path based on %q to be valid, got %q", segment, err) | ||
} | ||
} | ||
} | ||
|
||
func TestInvalidPathSegments(t *testing.T) { | ||
// invalidPathSegments are only invalid as path segments, but are valid in paths | ||
// they *are* valid when passed through pathSegmentVariations() to gain a prefix/suffix, and | ||
// they *are* valid as a Path() | ||
for _, segment := range invalidPathSegments { | ||
// invalid as a path segment, whether bytes or string | ||
if err := ValidateSegment(segment); err == nil { | ||
t.Fatalf("expected string segment %q to be invalid, but it was valid", segment) | ||
} | ||
|
||
if err := ValidateSegmentBytes([]byte(segment)); err == nil { | ||
t.Fatalf("expected []byte segment %q to be invalid, but it was valid", segment) | ||
} | ||
|
||
// valid as a Path | ||
if _, err := ParsePath("/ipfs/QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n/a/" + segment); err != nil { | ||
t.Fatalf("expected Path based on %q to be valid, got %q", segment, err) | ||
} | ||
} | ||
} | ||
|
||
func TestInvalidPathSegmentSubstrings(t *testing.T) { | ||
// invalidPathSegmentSubstrings are invalid as path segments, whether prefixed or not, but *are* valid in paths | ||
for _, segment := range pathSegmentVariations(invalidPathSegmentSubstrings) { | ||
// invalid as a path segment, whether bytes or string | ||
if err := ValidateSegment(segment); err == nil { | ||
t.Fatalf("expected string segment %q to be invalid, but it was valid", segment) | ||
} | ||
|
||
if err := ValidateSegmentBytes([]byte(segment)); err == nil { | ||
t.Fatalf("expected []byte segment %q to be invalid, but it was valid", segment) | ||
} | ||
|
||
if _, err := ParsePath("/ipfs/QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n/a/" + segment); err != nil { | ||
t.Fatalf("expected Path based on %q to be valid, got %q", segment, err) | ||
} | ||
} | ||
} | ||
|
||
func TestInvalidPathSubstrings(t *testing.T) { | ||
// invalidPathSubstrings are invalid when appearing anywhere Path-related | ||
|
||
for _, segment := range pathSegmentVariations(invalidPathSubstrings) { | ||
if err := ValidateSegment(segment); err == nil { | ||
t.Fatalf("expected string segment %q to be invalid, but it was valid", segment) | ||
} | ||
|
||
if err := ValidateSegmentBytes([]byte(segment)); err == nil { | ||
t.Fatalf("expected []byte segment %q to be invalid, but it was valid", segment) | ||
} | ||
|
||
if _, err := ParsePath("/ipfs/QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n/a/" + segment); err == nil { | ||
t.Fatalf("expected Path based on %q to be invalid, but it was valid", segment) | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe remove this commented out stuff, as it is part of
ParsePath
?