From 93e567db7b4b0ce989c1f0234ab364e5a7f7041f Mon Sep 17 00:00:00 2001 From: Mathias Lafeldt Date: Mon, 16 Aug 2021 21:37:52 +0200 Subject: [PATCH] Start reimplementing get-strip --- Cargo.lock | 430 ++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 7 + get-strip/lambda.rs | 70 ++++++++ 3 files changed, 507 insertions(+) create mode 100644 get-strip/lambda.rs diff --git a/Cargo.lock b/Cargo.lock index 762eb74..f88b1a1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -89,6 +89,21 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" +[[package]] +name = "bit-set" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e11e16035ea35e4e5997b393eacbf6f63983188f7a2ad25bfb13465f5ad59de" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bitflags" version = "1.2.1" @@ -101,6 +116,12 @@ version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c59e7af012c713f529e7a3ee57ce9b31ddd858d4b512923602f74608b009631" +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + [[package]] name = "bytes" version = "1.0.1" @@ -169,6 +190,12 @@ dependencies = [ "serde", ] +[[package]] +name = "convert_case" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" + [[package]] name = "core-foundation" version = "0.9.1" @@ -205,6 +232,33 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "cssparser" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa", + "matches", + "phf", + "proc-macro2", + "quote", + "smallvec", + "syn", +] + +[[package]] +name = "cssparser-macros" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "deadpool" version = "0.7.0" @@ -219,14 +273,30 @@ dependencies = [ "tokio", ] +[[package]] +name = "derive_more" +version = "0.99.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40eebddd2156ce1bb37b20bbe5151340a31828b1f2d22ba4141f3531710e38df" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "rustc_version", + "syn", +] + [[package]] name = "dilbert-feed" version = "0.1.0" dependencies = [ + "chrono", "lambda_runtime", "log", "openssl", "reqwest", + "scraper", + "select", "serde", "serde_json", "simple_logger", @@ -234,6 +304,27 @@ dependencies = [ "wiremock", ] +[[package]] +name = "dtoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56899898ce76aaf4a0f24d914c97ea6ed976d42fec6ad33fcbb0a1103e07b2b0" + +[[package]] +name = "dtoa-short" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bde03329ae10e79ede66c9ce4dc930aa8599043b0743008548680f25b91502d6" +dependencies = [ + "dtoa", +] + +[[package]] +name = "ego-tree" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a68a4904193147e0a8dec3314640e6db742afd5f6e634f428a6af230d9b3591" + [[package]] name = "encoding_rs" version = "0.8.28" @@ -289,6 +380,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futf" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c9c1ce3fa9336301af935ab852c437817d14cd33690446569392e65170aac3b" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures" version = "0.3.16" @@ -404,6 +505,24 @@ dependencies = [ "slab", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + +[[package]] +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.1.16" @@ -460,6 +579,20 @@ dependencies = [ "libc", ] +[[package]] +name = "html5ever" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aafcf38a1a36118242d29b92e1b08ef84e67e4a5ed06e0a80be20e6a32bfed6b" +dependencies = [ + "log", + "mac", + "markup5ever", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "http" version = "0.2.4" @@ -663,6 +796,38 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "markup5ever" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a24f40fb03852d1cdd84330cddcaf98e9ec08a7b7768e952fad3b4cf048ec8fd" +dependencies = [ + "log", + "phf", + "phf_codegen", + "string_cache", + "string_cache_codegen", + "tendril", +] + +[[package]] +name = "markup5ever_rcdom" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f015da43bcd8d4f144559a3423f4591d69b8ce0652c905374da7205df336ae2b" +dependencies = [ + "html5ever", + "markup5ever", + "tendril", + "xml5ever", +] + [[package]] name = "matches" version = "0.1.8" @@ -721,6 +886,18 @@ dependencies = [ "tempfile", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" + +[[package]] +name = "nodrop" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" + [[package]] name = "nom" version = "5.1.2" @@ -831,6 +1008,69 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" +[[package]] +name = "pest" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53" +dependencies = [ + "ucd-trie", +] + +[[package]] +name = "phf" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12" +dependencies = [ + "phf_macros", + "phf_shared", + "proc-macro-hack", +] + +[[package]] +name = "phf_codegen" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbffee61585b0411840d3ece935cce9cb6321f01c45477d30066498cd5e1a815" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526" +dependencies = [ + "phf_shared", + "rand 0.7.3", +] + +[[package]] +name = "phf_macros" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro-hack", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "phf_shared" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project-lite" version = "0.2.7" @@ -855,6 +1095,12 @@ version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "proc-macro-hack" version = "0.5.19" @@ -896,6 +1142,7 @@ dependencies = [ "rand_chacha 0.2.2", "rand_core 0.5.1", "rand_hc 0.2.0", + "rand_pcg", ] [[package]] @@ -966,6 +1213,15 @@ dependencies = [ "rand_core 0.6.3", ] +[[package]] +name = "rand_pcg" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429" +dependencies = [ + "rand_core 0.5.1", +] + [[package]] name = "redox_syscall" version = "0.2.10" @@ -1035,6 +1291,15 @@ dependencies = [ "winreg", ] +[[package]] +name = "rustc_version" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0dfe2087c51c460008730de8b57e6a320782fbfb312e1f4d520e6c6fae155ee" +dependencies = [ + "semver", +] + [[package]] name = "ryu" version = "1.0.5" @@ -1051,6 +1316,22 @@ dependencies = [ "winapi", ] +[[package]] +name = "scraper" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e02aa790c80c2e494130dec6a522033b6a23603ffc06360e9fe6c611ea2c12" +dependencies = [ + "cssparser", + "ego-tree", + "getopts", + "html5ever", + "matches", + "selectors", + "smallvec", + "tendril", +] + [[package]] name = "security-framework" version = "2.3.1" @@ -1074,6 +1355,55 @@ dependencies = [ "libc", ] +[[package]] +name = "select" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ee061f90afcc8678bef7a78d0d121683f0ba753f740ff7005f833ec445876b7" +dependencies = [ + "bit-set", + "html5ever", + "markup5ever_rcdom", +] + +[[package]] +name = "selectors" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df320f1889ac4ba6bc0cdc9c9af7af4bd64bb927bccdf32d81140dc1f9be12fe" +dependencies = [ + "bitflags", + "cssparser", + "derive_more", + "fxhash", + "log", + "matches", + "phf", + "phf_codegen", + "precomputed-hash", + "servo_arc", + "smallvec", + "thin-slice", +] + +[[package]] +name = "semver" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f301af10236f6df4160f7c3f04eec6dbc70ace82d23326abad5edee88801c6b6" +dependencies = [ + "semver-parser", +] + +[[package]] +name = "semver-parser" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0bef5b7f9e0df16536d3961cfb6e84331c065b4066afb39768d0e319411f7" +dependencies = [ + "pest", +] + [[package]] name = "serde" version = "1.0.127" @@ -1128,6 +1458,16 @@ dependencies = [ "serde", ] +[[package]] +name = "servo_arc" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d98238b800e0d1576d8b6e3de32827c2d74bee68bb97748dcf5071fb53965432" +dependencies = [ + "nodrop", + "stable_deref_trait", +] + [[package]] name = "sharded-slab" version = "0.1.3" @@ -1150,12 +1490,24 @@ dependencies = [ "winapi", ] +[[package]] +name = "siphasher" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "729a25c17d72b06c68cb47955d44fda88ad2d3e7d77e025663fdd69b93dd71a1" + [[package]] name = "slab" version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c307a32c1c5c437f38c7fd45d753050587732ba8628319fbdf12a7e289ccc590" +[[package]] +name = "smallvec" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" + [[package]] name = "socket2" version = "0.4.1" @@ -1166,12 +1518,43 @@ dependencies = [ "winapi", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "static_assertions" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "string_cache" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ddb1139b5353f96e429e1a5e19fbaf663bddedaa06d1dbd49f82e352601209a" +dependencies = [ + "lazy_static", + "new_debug_unreachable", + "phf_shared", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f24c8e5e19d22a726626f1a5e16fe15b132dcf21d10177fa5a45ce7962996b97" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", +] + [[package]] name = "syn" version = "1.0.74" @@ -1197,6 +1580,23 @@ dependencies = [ "winapi", ] +[[package]] +name = "tendril" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9ef557cb397a4f0a5a3a628f06515f78563f2209e64d47055d9dc6052bf5e33" +dependencies = [ + "futf", + "mac", + "utf-8", +] + +[[package]] +name = "thin-slice" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c" + [[package]] name = "thiserror" version = "1.0.26" @@ -1381,6 +1781,12 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" +[[package]] +name = "ucd-trie" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56dee185309b50d1f11bfedef0fe6d036842e3fb77413abef29f8f8d1c5d4c1c" + [[package]] name = "unicode-bidi" version = "0.3.5" @@ -1399,6 +1805,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-width" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" + [[package]] name = "unicode-xid" version = "0.2.2" @@ -1418,6 +1830,12 @@ dependencies = [ "serde", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "vcpkg" version = "0.2.15" @@ -1586,3 +2004,15 @@ dependencies = [ "serde_json", "tokio", ] + +[[package]] +name = "xml5ever" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b1b52e6e8614d4a58b8e70cf51ec0cc21b256ad8206708bcff8139b5bbd6a59" +dependencies = [ + "log", + "mac", + "markup5ever", + "time", +] diff --git a/Cargo.toml b/Cargo.toml index c9bfdb3..b912e92 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,11 +12,18 @@ edition = "2018" name = "heartbeat" path = "heartbeat/lambda.rs" +[[bin]] +name = "get-strip" +path = "get-strip/lambda.rs" + [dependencies] +chrono = "0.4" lambda_runtime = "0.4" log = "0.4" openssl = { version = "0.10", features = ["vendored"] } reqwest = "0.11" +select = "0.5" +scraper = "0.12" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" simple_logger = "1.13" diff --git a/get-strip/lambda.rs b/get-strip/lambda.rs new file mode 100644 index 0000000..1c08516 --- /dev/null +++ b/get-strip/lambda.rs @@ -0,0 +1,70 @@ +use chrono::Datelike; +use lambda_runtime::{handler_fn, Context, Error}; +use log::{debug, info}; +use select::document::Document; +use select::predicate::Class; +use serde::{Deserialize, Serialize}; +// use serde_json::Value; +// use std::env; + +#[derive(Deserialize, Debug)] +struct Input { + date: Option, +} + +#[derive(Serialize, PartialEq, Debug)] +struct Output { + date: String, + title: String, + image_url: String, + strip_url: String, + + upload_url: String, +} + +#[tokio::main] +async fn main() -> Result<(), Error> { + simple_logger::init_with_env()?; + // lambda_runtime::run(handler_fn(handler)).await?; + info!( + "{:?}", + handler( + Input { + date: Some("2000-07-15".to_string()), + // date: None, + }, + Context::default(), + ) + .await? + ); + Ok(()) +} + +async fn handler(input: Input, _: Context) -> Result { + debug!("Got input: {:?}", input); + + let date = input.date.unwrap_or_else(|| { + let now = chrono::Utc::now(); + format!("{}-{:02}-{:02}", now.year(), now.month(), now.day()) + }); + + let base_url = "https://dilbert.com"; + let strip_url = format!("{}/strip/{}", base_url, date); + + let resp = reqwest::get(&strip_url).await?.error_for_status()?; + let body = resp.text().await?; + let document = Document::from(body.as_ref()); + let container = document.find(Class("comic-item-container")).next().unwrap(); + + info!("{}", container.attr("data-id").unwrap()); + let title = container.attr("data-title").unwrap(); + let image_url = container.attr("data-image").unwrap(); + + Ok(Output { + date: date, + title: title.to_string(), + image_url: image_url.to_string(), + strip_url: strip_url, + upload_url: "".to_string(), + }) +}