From 9a375d8a10610b2fa8767b785eca1653300cbc74 Mon Sep 17 00:00:00 2001 From: Malted Date: Tue, 10 Sep 2024 11:42:22 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Update=20old=20repl=20fetch=20fu?= =?UTF-8?q?nction?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 2 +- Cargo.toml | 2 +- examples/repls.rs | 29 ++++-- src/graphql/profilerepls-query.graphql | 46 ++++----- src/replit_graphql.rs | 124 ++++++++++++++----------- 5 files changed, 116 insertions(+), 87 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c9ab946..0293b2b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2079,7 +2079,7 @@ checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "replit-takeout" -version = "1.7.8" +version = "1.7.10" dependencies = [ "airtable-api", "anyhow", diff --git a/Cargo.toml b/Cargo.toml index 31d332e..7bff0d1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "replit-takeout" -version = "1.7.8" +version = "1.7.10" edition = "2021" authors = ["Ben Dixon "] diff --git a/examples/repls.rs b/examples/repls.rs index f0c76c9..ded8907 100644 --- a/examples/repls.rs +++ b/examples/repls.rs @@ -3,7 +3,7 @@ use std::collections::HashMap; use anyhow::Result; use dotenv::var; use log::error; -use replit_takeout::replit::repls::Repl; +use replit_takeout::{replit::repls::Repl, replit_graphql::ProfileRepls}; #[tokio::main] async fn main() -> Result<()> { @@ -12,18 +12,29 @@ async fn main() -> Result<()> { let token = var("REPLIT_TEST_TOKEN")?; - let repls = Repl::fetch(&token, None).await.expect("some repls"); - error!("got {} repls", repls.len()); + //#region New method + // { + // let repls = Repl::fetch(&token, None).await?; + // error!("got {} repls", repls.len()); - let mut map: HashMap = HashMap::new(); + // let mut map: HashMap = HashMap::new(); - for repl in repls { - if map.contains_key(&repl.id) { - log::error!("ALREADY CONTAINS {:?}", repl.clone()); - } + // for repl in repls { + // if map.contains_key(&repl.id) { + // log::error!("ALREADY CONTAINS {:?}", repl.clone()); + // } - map.insert(repl.id.clone(), repl); + // map.insert(repl.id.clone(), repl); + // } + // } + //#endregion + + //#region Old, fixed method + { + let repls = ProfileRepls::fetch(&token, 222834, None).await?; + println!("{:#?}", repls.len()); } + //#endregion Ok(()) } diff --git a/src/graphql/profilerepls-query.graphql b/src/graphql/profilerepls-query.graphql index 94e3008..75785b3 100644 --- a/src/graphql/profilerepls-query.graphql +++ b/src/graphql/profilerepls-query.graphql @@ -1,24 +1,24 @@ -query ProfileRepls($after: String, $id: Int!) { - user(id: $id) { - profileRepls(after: $after, count: 100) { - # Max per page is 25 - items { - id - slug - title - url - description - isRenamed - isAlwaysOn - isProjectFork - likeCount - language - timeCreated - } - pageInfo { - hasNextPage - nextCursor - } - } - } +query ProfileRepls($after: String, $user_id: Int!) { + user(id: $user_id) { + profileRepls(after: $after, count: 100) { + # Max per page is 25 + items { + id + slug + title + url + description + isRenamed + isAlwaysOn + isProjectFork + likeCount + language + timeCreated + } + pageInfo { + hasNextPage + nextCursor + } + } + } } diff --git a/src/replit_graphql.rs b/src/replit_graphql.rs index bbcc63d..3dd7112 100644 --- a/src/replit_graphql.rs +++ b/src/replit_graphql.rs @@ -10,7 +10,7 @@ use reqwest::{ use std::sync::Arc; use std::time::Duration; use time::OffsetDateTime; -use tokio::fs; +use tokio::{fs, time::sleep}; use serde::{Deserialize, Serialize}; @@ -118,58 +118,76 @@ type DateTime = String; )] pub struct ProfileRepls; impl ProfileRepls { - // /// Get one page of repls. - // #[deprecated] - // async fn fetch( - // token: &String, - // id: i64, - // client_opt: Option, - // after: Option, - // ) -> Result<( - // Vec, - // Option, - // )> { - // let client = create_client(token, client_opt)?; - - // let repls_query = ProfileRepls::build_query(profile_repls::Variables { id, after }); - - // let repls_data: String = client - // .post(REPLIT_GQL_URL) - // .json(&repls_query) - // .send() - // .await? - // .text() - // .await?; - // debug!( - // "{}:{} Raw text repl data: {repls_data}", - // std::line!(), - // std::column!() - // ); - - // let repls_data_result = - // match serde_json::from_str::>(&repls_data) { - // Ok(data) => data.data, - // Err(e) => { - // error!("Failed to deserialize JSON: {}", e); - // return Err(anyhow::Error::new(e)); - // } - // }; - - // let next_page = repls_data_result - // .as_ref() - // .and_then(|data| { - // data.user - // .as_ref() - // .map(|user| user.profile_repls.page_info.next_cursor.clone()) - // }) - // .ok_or(anyhow::Error::msg("Page Info not found during download"))?; - - // let repls = repls_data_result - // .and_then(|data| data.user.map(|user| user.profile_repls.items)) - // .ok_or(anyhow::Error::msg("Repls not found during download"))?; - - // Ok((repls, next_page)) - // } + /// Get one page of repls. + #[deprecated] + pub async fn fetch( + token: &String, + user_id: i64, + client_opt: Option, + ) -> Result> { + let mut all_repls = Vec::new(); + let mut after = None; + let client = create_client(token, client_opt)?; + + loop { + let (repls, next_page) = Self::fetch_page(&client, user_id, after.clone()).await?; + all_repls.extend(repls); + + if let Some(next_cursor) = next_page { + after = Some(next_cursor); + // Add a small delay between requests to avoid rate limiting + sleep(Duration::from_millis(100)).await; + } else { + break; + } + } + + info!("Fetched a total of {} repls", all_repls.len()); + Ok(all_repls) + } + + async fn fetch_page( + client: &Client, + user_id: i64, + after: Option, + ) -> Result<( + Vec, + Option, + )> { + let repls_query = ProfileRepls::build_query(profile_repls::Variables { user_id, after }); + let repls_data: String = client + .post(REPLIT_GQL_URL) + .json(&repls_query) + .send() + .await? + .text() + .await?; + + debug!( + "{}:{} Raw text repl data: {repls_data}", + std::line!(), + std::column!() + ); + + let repls_data_result: Response = + serde_json::from_str(&repls_data).map_err(|e| { + error!("Failed to deserialize JSON: {}", e); + anyhow::Error::new(e) + })?; + + let data = repls_data_result + .data + .ok_or_else(|| anyhow::Error::msg("No data returned from API"))?; + + let user = data + .user + .ok_or_else(|| anyhow::Error::msg("User data not found"))?; + + let next_page = user.profile_repls.page_info.next_cursor; + let repls = user.profile_repls.items; + + Ok((repls, next_page)) + } pub async fn download( token: &String,