Skip to content

Commit

Permalink
upgrade html5ever to 0.26.0
Browse files Browse the repository at this point in the history
  • Loading branch information
snshn committed Jan 14, 2024
1 parent f7dd09d commit f7e5527
Show file tree
Hide file tree
Showing 9 changed files with 233 additions and 257 deletions.
325 changes: 101 additions & 224 deletions Cargo.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ chrono = "0.4.31" # Used for formatting creation timestamp
clap = "3.2.25" # Used for processing CLI arguments
cssparser = "0.33.0" # Used for dealing with CSS
encoding_rs = "0.8.33" # Used for parsing and converting document charsets
html5ever = "0.24.1" # Used for all things DOM
html5ever = "0.26.0" # Used for all things DOM
markup5ever_rcdom = "0.2.0"
percent-encoding = "2.3.1"
sha2 = "0.10.8" # Used for calculating checksums during integrity checks
url = "2.5.0"
Expand Down
40 changes: 28 additions & 12 deletions src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ use chrono::prelude::*;
use encoding_rs::Encoding;
use html5ever::interface::QualName;
use html5ever::parse_document;
use html5ever::rcdom::{Handle, NodeData, RcDom};
use html5ever::serialize::{serialize, SerializeOpts};
use html5ever::tendril::{format_tendril, TendrilSink};
use html5ever::tree_builder::{Attribute, TreeSink};
use html5ever::{local_name, namespace_url, ns, LocalName};
use markup5ever_rcdom::{Handle, NodeData, RcDom, SerializableHandle};
use regex::Regex;
use reqwest::blocking::Client;
use reqwest::Url;
Expand All @@ -32,8 +32,12 @@ const ICON_VALUES: &'static [&str] = &["icon", "shortcut icon"];

pub fn add_favicon(document: &Handle, favicon_data_url: String) -> RcDom {
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, document, SerializeOpts::default())
.expect("unable to serialize DOM into buffer");
serialize(
&mut buf,
&SerializableHandle::from(document.clone()),
SerializeOpts::default(),
)
.expect("unable to serialize DOM into buffer");

let mut dom = html_to_dom(&buf, "utf-8".to_string());
let doc = dom.get_document();
Expand Down Expand Up @@ -428,8 +432,12 @@ pub fn is_icon(attr_value: &str) -> bool {

pub fn set_base_url(document: &Handle, desired_base_href: String) -> RcDom {
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, document, SerializeOpts::default())
.expect("unable to serialize DOM into buffer");
serialize(
&mut buf,
&SerializableHandle::from(document.clone()),
SerializeOpts::default(),
)
.expect("unable to serialize DOM into buffer");

let mut dom = html_to_dom(&buf, "utf-8".to_string());
let doc = dom.get_document();
Expand Down Expand Up @@ -534,7 +542,7 @@ pub fn set_node_attr(node: &Handle, attr_name: &str, attr_value: Option<String>)

pub fn serialize_document(mut dom: RcDom, document_encoding: String, options: &Options) -> Vec<u8> {
let mut buf: Vec<u8> = Vec::new();
let doc = dom.get_document();
let document = dom.get_document();

if options.isolate
|| options.no_css
Expand All @@ -544,7 +552,7 @@ pub fn serialize_document(mut dom: RcDom, document_encoding: String, options: &O
|| options.no_images
{
// Take care of CSP
if let Some(html) = get_child_node_by_name(&doc, "html") {
if let Some(html) = get_child_node_by_name(&document, "html") {
if let Some(head) = get_child_node_by_name(&html, "head") {
let meta = dom.create_element(
QualName::new(None, ns!(), local_name!("meta")),
Expand All @@ -570,8 +578,12 @@ pub fn serialize_document(mut dom: RcDom, document_encoding: String, options: &O
}
}

serialize(&mut buf, &doc, SerializeOpts::default())
.expect("Unable to serialize DOM into buffer");
serialize(
&mut buf,
&SerializableHandle::from(document.clone()),
SerializeOpts::default(),
)
.expect("Unable to serialize DOM into buffer");

// Unwrap NOSCRIPT elements
if options.unwrap_noscript {
Expand Down Expand Up @@ -660,7 +672,7 @@ pub fn retrieve_and_embed_asset(
let mut frame_data: Vec<u8> = Vec::new();
serialize(
&mut frame_data,
&frame_dom.document,
&SerializableHandle::from(frame_dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
Expand Down Expand Up @@ -1198,8 +1210,12 @@ pub fn walk_and_embed_assets(
{
if let Some(body) = get_child_node_by_name(&html, "body") {
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &body, SerializeOpts::default())
.expect("Unable to serialize DOM into buffer");
serialize(
&mut buf,
&SerializableHandle::from(body.clone()),
SerializeOpts::default(),
)
.expect("Unable to serialize DOM into buffer");
let result = String::from_utf8_lossy(&buf);
noscript_contents.push_slice(&result);
}
Expand Down
2 changes: 1 addition & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use encoding_rs::Encoding;
use html5ever::rcdom::RcDom;
use markup5ever_rcdom::RcDom;
use reqwest::blocking::Client;
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use std::collections::HashMap;
Expand Down
8 changes: 7 additions & 1 deletion tests/html/add_favicon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#[cfg(test)]
mod passing {
use html5ever::serialize::{serialize, SerializeOpts};
use markup5ever_rcdom::SerializableHandle;

use monolith::html;

Expand All @@ -19,7 +20,12 @@ mod passing {
dom = html::add_favicon(&dom.document, "I_AM_A_FAVICON_DATA_URL".to_string());

let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();

assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
Expand Down
2 changes: 1 addition & 1 deletion tests/html/get_node_attr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

#[cfg(test)]
mod passing {
use html5ever::rcdom::{Handle, NodeData};
use markup5ever_rcdom::{Handle, NodeData};

use monolith::html;

Expand Down
2 changes: 1 addition & 1 deletion tests/html/get_node_name.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

#[cfg(test)]
mod passing {
use html5ever::rcdom::{Handle, NodeData};
use markup5ever_rcdom::{Handle, NodeData};

use monolith::html;

Expand Down
2 changes: 1 addition & 1 deletion tests/html/set_node_attr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

#[cfg(test)]
mod passing {
use html5ever::rcdom::{Handle, NodeData};
use markup5ever_rcdom::{Handle, NodeData};

use monolith::html;

Expand Down
106 changes: 91 additions & 15 deletions tests/html/walk_and_embed_assets.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#[cfg(test)]
mod passing {
use html5ever::serialize::{serialize, SerializeOpts};
use markup5ever_rcdom::SerializableHandle;
use reqwest::blocking::Client;
use std::collections::HashMap;
use url::Url;
Expand All @@ -32,7 +33,12 @@ mod passing {
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);

let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();

assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
Expand All @@ -55,7 +61,12 @@ mod passing {
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);

let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();

assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
Expand All @@ -78,7 +89,12 @@ mod passing {
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);

let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();

assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
Expand Down Expand Up @@ -107,7 +123,12 @@ mod passing {
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);

let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();

assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
Expand Down Expand Up @@ -143,7 +164,12 @@ mod passing {
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);

let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();

assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
Expand Down Expand Up @@ -180,7 +206,12 @@ mod passing {
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);

let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();

assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
Expand All @@ -204,7 +235,12 @@ mod passing {
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);

let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();

assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
Expand Down Expand Up @@ -236,7 +272,12 @@ mod passing {
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);

let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();

assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
Expand Down Expand Up @@ -272,7 +313,12 @@ mod passing {
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);

let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();

assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
Expand Down Expand Up @@ -306,7 +352,12 @@ mod passing {
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);

let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();

assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
Expand Down Expand Up @@ -343,7 +394,12 @@ mod passing {
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);

let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();

assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
Expand Down Expand Up @@ -381,7 +437,12 @@ mod passing {
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);

let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();

assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
Expand Down Expand Up @@ -427,7 +488,12 @@ mod passing {
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);

let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();

assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
Expand Down Expand Up @@ -466,7 +532,12 @@ mod passing {
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);

let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();

assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
Expand Down Expand Up @@ -501,7 +572,12 @@ mod passing {
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);

let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();

assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
Expand Down

0 comments on commit f7e5527

Please sign in to comment.