Skip to content

Commit

Permalink
chore(website): fix background spawn
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Jan 28, 2025
1 parent 08235ec commit b010eae
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 30 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
edition = "2021"
name = "spider_rs"
version = "0.0.56"
version = "0.0.57"
repository = "https://github.com/spider-rs/spider-py"
license = "MIT"
description = "The fastest web crawler and indexer."
Expand Down
56 changes: 27 additions & 29 deletions src/website.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,9 @@ impl Website {
let mut rx2 = website
.subscribe(*BUFFER / 2)
.expect("sync feature should be enabled");
let rt = pyo3_async_runtimes::tokio::get_runtime();

let handle = spider::tokio::spawn(async move {
let handle = rt.spawn(async move {
while let Ok(res) = rx2.recv().await {
let page = new_page(&res, raw_content);
Python::with_gil(|py| {
Expand All @@ -178,7 +179,7 @@ impl Website {
_ => 0,
};

let crawl_handle = spider::tokio::spawn(async move {
let crawl_handle = rt.spawn(async move {
if headless {
website.crawl().await;
} else {
Expand Down Expand Up @@ -237,8 +238,9 @@ impl Website {
Some(handle) => handle.0 + 1,
_ => 0,
};
let rt = pyo3_async_runtimes::tokio::get_runtime();

let crawl_handle = spider::tokio::spawn(async move {
let crawl_handle = rt.spawn(async move {
if headless {
website.crawl().await;
} else {
Expand Down Expand Up @@ -283,8 +285,9 @@ impl Website {
let mut rx2 = website
.subscribe(*BUFFER / 2)
.expect("sync feature should be enabled");
let rt = pyo3_async_runtimes::tokio::get_runtime();

let handle = spider::tokio::spawn(async move {
let handle = rt.spawn(async move {
while let Ok(res) = rx2.recv().await {
let page = new_page(&res, raw_content);
Python::with_gil(|py| {
Expand All @@ -298,7 +301,7 @@ impl Website {
_ => 0,
};

let crawl_handle = spider::tokio::spawn(async move {
let crawl_handle = rt.spawn(async move {
website.crawl_smart().await;
});

Expand Down Expand Up @@ -344,13 +347,14 @@ impl Website {
_ => {
if background {
let mut website = slf.inner.clone();
let rt = pyo3_async_runtimes::tokio::get_runtime();

let crawl_id = match slf.crawl_handles.last() {
Some(handle) => handle.0 + 1,
_ => 0,
};

let crawl_handle = spider::tokio::spawn(async move {
let crawl_handle = rt.spawn(async move {
website.crawl_smart().await;
});

Expand Down Expand Up @@ -388,8 +392,9 @@ impl Website {
let mut rx2 = website
.subscribe(*BUFFER / 2)
.expect("sync feature should be enabled");
let rt = pyo3_async_runtimes::tokio::get_runtime();

let handle = spider::tokio::spawn(async move {
let handle = rt.spawn(async move {
while let Ok(res) = rx2.recv().await {
let page = new_page(&res, raw_content);

Expand All @@ -405,7 +410,7 @@ impl Website {
_ => 0,
};

let crawl_handle = spider::tokio::spawn(async move {
let crawl_handle = rt.spawn(async move {
if headless {
website.scrape().await;
} else {
Expand Down Expand Up @@ -459,13 +464,14 @@ impl Website {
_ => {
if background {
let mut website = slf.inner.clone();
let rt = pyo3_async_runtimes::tokio::get_runtime();

let crawl_id = match slf.crawl_handles.last() {
Some(handle) => handle.0 + 1,
_ => 0,
};

let crawl_handle = spider::tokio::spawn(async move {
let crawl_handle = rt.spawn(async move {
if headless {
website.scrape().await;
} else {
Expand Down Expand Up @@ -498,8 +504,9 @@ impl Website {
.subscribe(*BUFFER / 2)
.expect("sync feature should be enabled");
let raw_content = slf.raw_content;
let rt = pyo3_async_runtimes::tokio::get_runtime();

let handler = spider::tokio::spawn(async move {
let handler = rt.spawn(async move {
while let Ok(res) = rx2.recv().await {
Python::with_gil(|py| {
let _ = callback.call(py, (new_page(&res, raw_content),), None);
Expand Down Expand Up @@ -603,28 +610,19 @@ impl Website {
let py = slf.py();
let dict = obj.downcast_bound::<pyo3::types::PyDict>(py);

match dict {
Ok(keys) => {
for key in keys.into_iter() {
let header_key = spider::reqwest::header::HeaderName::from_str(&key.0.to_string());

match header_key {
Ok(hn) => {
let header_value = key.1.to_string();

match spider::reqwest::header::HeaderValue::from_str(&header_value) {
Ok(hk) => {
h.append(hn, hk);
}
_ => (),
}
}
_ => (),
if let Ok(keys) = dict {
for key in keys.into_iter() {
let header_key = spider::reqwest::header::HeaderName::from_str(&key.0.to_string());

if let Ok(hn) = header_key {
let header_value = key.1.to_string();

if let Ok(hk) = spider::reqwest::header::HeaderValue::from_str(&header_value) {
h.append(hn, hk);
}
}
slf.inner.with_headers(Some(h));
}
_ => (),
slf.inner.with_headers(Some(h));
}
}
_ => {
Expand Down

0 comments on commit b010eae

Please sign in to comment.