Skip to content

Commit

Permalink
Allow ampjs domain for serving amp js files.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 489206825
  • Loading branch information
amaltas authored and banaag committed Sep 1, 2023
1 parent e8da9e3 commit 81cece3
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 37 deletions.
98 changes: 61 additions & 37 deletions validator/cpp/engine/validator-internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -311,42 +311,62 @@ ScriptTag ParseScriptTag(htmlparser::Node* node) {
}
}

if (src.empty()) {
return script_tag;
}

std::string src_str{src};
// Determine if this has a valid AMP domain and separate the path from the
// attribute 'src'. Consumes the domain making src just the path.
if (absl::ConsumePrefix(&src, kAmpProjectDomain)) {
script_tag.is_amp_domain = true;
script_tag.path = std::string(src);

// Only look at script tags that have attribute 'async'.
if (has_async_attr) {
// Determine if this is the AMP Runtime.
if (!script_tag.is_extension &&
RE2::FullMatch(src, *kRuntimeScriptPathRe)) {
script_tag.is_runtime = true;
script_tag.has_valid_path = true;
}

// For AMP Extensions, validate path and extract name and version.
if (script_tag.is_extension &&
RE2::FullMatch(src, *kExtensionPathRe, &script_tag.extension_name,
&script_tag.extension_version)) {
script_tag.has_valid_path = true;
}

// Determine the release version (LTS, module, standard, etc).
if ((has_module_attr && RE2::FullMatch(src, *kModuleLtsScriptPathRe)) ||
(has_nomodule_attr && RE2::FullMatch(src, *kLtsScriptPathRe))) {
script_tag.release_version = ScriptReleaseVersion::MODULE_NOMODULE_LTS;
} else if ((has_module_attr &&
RE2::FullMatch(src, *kModuleScriptPathRe)) ||
(has_nomodule_attr &&
RE2::FullMatch(src, *kStandardScriptPathRe))) {
script_tag.release_version = ScriptReleaseVersion::MODULE_NOMODULE;
} else if (RE2::FullMatch(src, *kLtsScriptPathRe)) {
script_tag.release_version = ScriptReleaseVersion::LTS;
} else if (RE2::FullMatch(src, *kStandardScriptPathRe)) {
script_tag.release_version = ScriptReleaseVersion::STANDARD;
}
script_tag.path = src_str;
} else {
script_tag.is_amp_domain = false;
htmlparser::URL url(src_str);
// Error cases, early exit:
if (!url.is_valid()) return script_tag;
if (!url.has_protocol()) return script_tag;
if (url.protocol() != "https" && url.protocol() != "http")
return script_tag;
if (url.hostname().empty()) return script_tag;

src = url.path_params_fragment().data();
// Trim the "/" prefix as this is what kExtensionPathRe expects.
if (!src.empty() && src[0] == '/') src = src.substr(1);
std::string src_str{src};
script_tag.path = src_str;
}

// Only look at script tags that have attribute 'async'.
if (has_async_attr) {
// Determine if this is the AMP Runtime.
if (!script_tag.is_extension &&
RE2::FullMatch(src, *kRuntimeScriptPathRe)) {
script_tag.is_runtime = true;
script_tag.has_valid_path = true;
}

// For AMP Extensions, validate path and extract name and version.
if (script_tag.is_extension &&
RE2::FullMatch(src, *kExtensionPathRe, &script_tag.extension_name,
&script_tag.extension_version)) {
script_tag.has_valid_path = true;
}

// Determine the release version (LTS, module, standard, etc).
if ((has_module_attr && RE2::FullMatch(src, *kModuleLtsScriptPathRe)) ||
(has_nomodule_attr && RE2::FullMatch(src, *kLtsScriptPathRe))) {
script_tag.release_version = ScriptReleaseVersion::MODULE_NOMODULE_LTS;
} else if ((has_module_attr &&
RE2::FullMatch(src, *kModuleScriptPathRe)) ||
(has_nomodule_attr &&
RE2::FullMatch(src, *kStandardScriptPathRe))) {
script_tag.release_version = ScriptReleaseVersion::MODULE_NOMODULE;
} else if (RE2::FullMatch(src, *kLtsScriptPathRe)) {
script_tag.release_version = ScriptReleaseVersion::LTS;
} else if (RE2::FullMatch(src, *kStandardScriptPathRe)) {
script_tag.release_version = ScriptReleaseVersion::STANDARD;
}
}
return script_tag;
Expand Down Expand Up @@ -3976,8 +3996,14 @@ void ValidateAmpScriptSrcAttr(const ParsedHtmlTag& tag,
const TagSpec& tag_spec, const Context& context,
ValidationResult* result) {
if (!tag.IsAmpDomain()) {
context.AddError(ValidationError::DISALLOWED_AMP_DOMAIN, context.line_col(),
/*params=*/{}, /*spec_url=*/"", result);
bool is_amp_format =
c_find(context.type_identifiers(), TypeIdentifier::kAmp) !=
context.type_identifiers().end();
if (!is_amp_format || context.is_transformed()) {
context.AddError(ValidationError::DISALLOWED_AMP_DOMAIN,
context.line_col(),
/*params=*/{}, /*spec_url=*/"", result);
}
}

if (tag.IsExtensionScript() && tag_spec.has_extension_spec()) {
Expand Down Expand Up @@ -5660,9 +5686,7 @@ class ParsedValidatorRulesProvider {
class Validator {
public:
Validator(const ParsedValidatorRules* rules, int max_errors = -1)
: rules_(rules),
max_errors_(max_errors),
context_(rules_, max_errors_) {}
: rules_(rules), max_errors_(max_errors), context_(rules_, max_errors_) {}

ValidationResult Validate(const htmlparser::Document& doc) {
doc_metadata_ = doc.Metadata();
Expand Down
3 changes: 3 additions & 0 deletions validator/cpp/htmlparser/url.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,9 @@ void URL::ParseAuthority() {
}
idx++;
}
// Save off whatever is left as the path + params + fragment.
// We don't further parse this.
path_params_fragment_ = url_.substr(idx);

// Extract the login string if one was found.
if (at_idx != -1) {
Expand Down
4 changes: 4 additions & 0 deletions validator/cpp/htmlparser/url.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ class URL {
std::string hostname() const { return host_; }
std::string login() const { return login_; }
int port() const { return port_; }
std::string_view path_params_fragment() const {
return path_params_fragment_;
}

private:
static bool IsAlphaNum(uint8_t c) {
Expand Down Expand Up @@ -79,6 +82,7 @@ class URL {
std::string login_;
std::string host_;
int port_;
std::string_view path_params_fragment_;
};

} // namespace htmlparser
Expand Down
4 changes: 4 additions & 0 deletions validator/cpp/htmlparser/url_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ TEST(URLTest, BasicTests) {
EXPECT_EQ(url.protocol(), "http");
EXPECT_EQ(url.hostname(), "www.google.com");
EXPECT_EQ(url.port(), 80);
EXPECT_EQ(url.path_params_fragment(), "/");

URL https_url("https://www.google.com/");
EXPECT_TRUE(https_url.has_protocol());
Expand Down Expand Up @@ -91,9 +92,11 @@ TEST(URLTest, BasicTests) {
EXPECT_EQ(port_url.protocol(), "http");
EXPECT_EQ(port_url.hostname(), "www.google.com");
EXPECT_EQ(port_url.port(), 8080);
EXPECT_EQ(port_url.path_params_fragment(), "/");

URL port_url2("http://www.google.com:0080/foo:8080");
EXPECT_EQ(port_url2.port(), 80);
EXPECT_EQ(port_url2.path_params_fragment(), "/foo:8080");

// Invalid port.
URL invalid_port("http://www.google.com:99999/foo");
Expand All @@ -111,6 +114,7 @@ TEST(URLTest, BasicTests) {
URL ipv6("http://[::2]/foo");
EXPECT_TRUE(ipv6.is_valid());
EXPECT_EQ(ipv6.hostname(), "::2");
EXPECT_EQ(ipv6.path_params_fragment(), "/foo");

URL ipv62("http://foo:bar@[1:2:3::4]:8080/foo");
EXPECT_TRUE(ipv62.is_valid());
Expand Down

0 comments on commit 81cece3

Please sign in to comment.