Skip to content

Commit

Permalink
Merge pull request #117 from SierraSoftworks/feat/repo-backup
Browse files Browse the repository at this point in the history
feat: Add support for backing up single repositories using from: repos/<owner>/<name>
  • Loading branch information
notheotherben authored Dec 9, 2024
2 parents 1d76dd8 + 4e0d942 commit ffe90f9
Show file tree
Hide file tree
Showing 5 changed files with 237 additions and 100 deletions.
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,18 +47,33 @@ backups:
password: "<your personal access token>"
properties:
query: "affiliation=owner" # Additional query parameters to pass to GitHub when fetching repositories

- kind: github/repo
from: "users/another-user"
to: /backups/friend
credentials: !Token "your_github_token"

- kind: github/repo
from: "orgs/my-org"
to: /backups/work
filter: '!repo.fork && repo.name contains "awesome"'

- kind: github/release
from: "orgs/my-org"
to: /backups/releases
filter: '!release.prerelease && !asset.source-code'

# You can also backup single repositories directly if you wish
- kind: github/repo
from: "repos/my-org/repo"
to: /backups/work

# This is particularly useful for backing up release artifacts for
# specific projects.
- kind: github/release
from: "repos/my-org/repo"
to: /backups/releases
filter: '!release.prerelease'
```
### OpenTelemetry Reporting
Expand Down
9 changes: 9 additions & 0 deletions examples/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,12 @@ backups:
- kind: github/star
from: users/notheotherben
to: /backup/github

- kind: github/repo
from: repos/SierraSoftworks/github-backup
to: /backup/github

- kind: github/release
from: repos/SierraSoftworks/github-backup
to: /backup/github-releases
filter: '!release.prerelease'
60 changes: 59 additions & 1 deletion src/helpers/github.rs
Original file line number Diff line number Diff line change
Expand Up @@ -571,8 +571,53 @@ impl MetadataSource for GitHubReleaseAsset {
}
}

#[derive(Clone, Debug, PartialEq)]
pub enum GitHubRepoSourceKind {
CurrentUser,
User(String),
Org(String),
Repo(String),
}

impl GitHubRepoSourceKind {
pub fn api_endpoint(&self, artifact_kind: GitHubArtifactKind) -> String {
match self {
GitHubRepoSourceKind::CurrentUser => format!("user/{}", artifact_kind.api_endpoint()),
GitHubRepoSourceKind::User(u) => {
format!("users/{}/{}", u, artifact_kind.api_endpoint())
}
GitHubRepoSourceKind::Org(o) => format!("orgs/{}/{}", o, artifact_kind.api_endpoint()),
GitHubRepoSourceKind::Repo(r) => format!("repos/{}", r),
}
}
}

impl std::str::FromStr for GitHubRepoSourceKind {
type Err = crate::Error;

fn from_str(s: &str) -> Result<Self, Self::Err> {
let num_of_slashes = s.chars().filter(|c| *c == '/').count();

match s {
"user" => Ok(GitHubRepoSourceKind::CurrentUser),
s if s.starts_with("users/") && num_of_slashes == 1 => {
Ok(GitHubRepoSourceKind::User(s[6..].to_string()))
}
s if s.starts_with("orgs/") && num_of_slashes == 1 => {
Ok(GitHubRepoSourceKind::Org(s[5..].to_string()))
}
s if s.starts_with("repos/") && num_of_slashes == 2 => {
Ok(GitHubRepoSourceKind::Repo(s[6..].to_string()))
}
_ => Err(errors::user(
&format!("The 'from' declaration '{}' was not valid for a GitHub repository source.", s),
"Make sure you provide either 'user', 'users/<name>', 'orgs/<name>', or 'repos/<owner>/<name>'")),
}
}
}

#[allow(dead_code)]
#[derive(PartialEq, Debug, Clone, serde::Serialize, serde::Deserialize)]
#[derive(PartialEq, Debug, Copy, Clone, serde::Serialize, serde::Deserialize)]
pub enum GitHubArtifactKind {
#[serde(rename = "github/repo")]
Repo,
Expand Down Expand Up @@ -734,4 +779,17 @@ mod tests {
assert_eq!(kind.as_str(), kind_str);
assert_eq!(kind.api_endpoint(), url);
}

#[rstest]
#[case("user", GitHubRepoSourceKind::CurrentUser)]
#[case("users/notheotherben", GitHubRepoSourceKind::User("notheotherben".into()))]
#[case("orgs/sierrasoftworks", GitHubRepoSourceKind::Org("sierrasoftworks".into()))]
#[case("repos/sierrasoftworks/github-backup", GitHubRepoSourceKind::Repo("sierrasoftworks/github-backup".into()))]
fn test_deserialize_gh_repo_source_kind(
#[case] kind_str: &str,
#[case] expected_kind: GitHubRepoSourceKind,
) {
let kind: GitHubRepoSourceKind = kind_str.parse().unwrap();
assert_eq!(kind, expected_kind);
}
}
176 changes: 104 additions & 72 deletions src/sources/github_releases.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::{
entities::{Credentials, HttpFile},
errors::{self},
helpers::{
github::{GitHubArtifactKind, GitHubRelease, GitHubRepo},
github::{GitHubArtifactKind, GitHubRelease, GitHubRepo, GitHubRepoSourceKind},
GitHubClient,
},
policy::BackupPolicy,
Expand All @@ -25,29 +25,103 @@ impl GitHubReleasesSource {
}
}

impl GitHubReleasesSource {
fn load_releases<'a>(
&'a self,
policy: &'a BackupPolicy,
repo: &'a GitHubRepo,
cancel: &'a AtomicBool,
) -> impl Stream<Item = Result<HttpFile, crate::Error>> + 'a {
async_stream::stream! {
if !repo.has_downloads {
return;
}

let releases_url = format!("{}/releases", repo.url);

for await release in self.client.get_paginated::<GitHubRelease>(releases_url, &policy.credentials, cancel) {
if let Err(e) = release {
yield Err(e);
continue;
}

let release: GitHubRelease = release.unwrap();

if let Some(tarball_url) = &release.tarball_url {
yield Ok(HttpFile::new(format!("{}/{}/source.tar.gz", &repo.full_name, &release.tag_name), tarball_url)
.with_metadata_source(repo)
.with_metadata_source(&release)
.with_metadata("asset.source-code", true)
.with_credentials(match &policy.credentials {
Credentials::Token(token) => Credentials::UsernamePassword {
username: token.clone(),
password: "".to_string(),
},
creds => creds.clone(),
})
.with_last_modified(release.published_at));
}

for asset in release.assets.iter() {
if cancel.load(std::sync::atomic::Ordering::Relaxed) {
return;
}

if asset.state != "uploaded" {
continue;
}

let asset_url = format!("{}/releases/assets/{}", repo.url, asset.id);

yield Ok(HttpFile::new(format!("{}/{}/{}", &repo.full_name, &release.tag_name, &asset.name), asset_url)
.with_content_type(Some("application/octet-stream".to_string()))
.with_credentials(match &policy.credentials {
Credentials::Token(token) => Credentials::UsernamePassword {
username: token.clone(),
password: "".to_string(),
},
creds => creds.clone(),
})
.with_last_modified(Some(asset.updated_at))
.with_metadata_source(repo)
.with_metadata_source(&release)
.with_metadata_source(asset));
}
}
}
}
}

impl BackupSource<HttpFile> for GitHubReleasesSource {
fn kind(&self) -> &str {
GitHubArtifactKind::Release.as_str()
}

fn validate(&self, policy: &BackupPolicy) -> Result<(), crate::Error> {
let target = policy.from.as_str().trim_matches('/');
let target: GitHubRepoSourceKind = policy.from.as_str().parse()?;

match target {
"" => Err(errors::user(
"The target field is required for GitHub repository backups.",
"Please provide a target field in the policy using the format 'users/<username>' or 'orgs/<orgname>'.",
GitHubRepoSourceKind::User(u) if u.is_empty() => Err(errors::user(
&format!(
"Your 'from' target '{}' is not a valid GitHub username.",
policy.from.as_str()
),
"Make sure you provide a valid GitHub username in the 'from' field of your policy.",
)),

t if t.chars().filter(|c| *c == '/').count() > 1 => Err(errors::user(
&format!("The target field '{target}' contains too many segments."),
"Please provide a target field in the policy using the format 'users/<username>' or 'orgs/<orgname>'.",
GitHubRepoSourceKind::Org(org) if org.is_empty() => Err(errors::user(
&format!(
"Your 'from' target '{}' is not a valid GitHub organization name.",
policy.from.as_str()
),
"Make sure you provide a valid GitHub organization name in the 'from' field of your policy.",
)),

t if !t.starts_with("users/") && !t.starts_with("orgs/") => Err(errors::user(
&format!("The target field '{target}' does not include a valid user or org specifier."),
"Please specify either 'users/<username>' or 'orgs/<orgname>' as your target.",
GitHubRepoSourceKind::Repo(repo) if repo.is_empty() => Err(errors::user(
&format!(
"Your 'from' target '{}' is not a fully qualified GitHub repository name.",
policy.from.as_str()
),
"Make sure you provide a fully qualified GitHub repository name in the 'from' field of your policy.",
)),

_ => Ok(()),
}
}
Expand All @@ -57,80 +131,38 @@ impl BackupSource<HttpFile> for GitHubReleasesSource {
policy: &'a BackupPolicy,
cancel: &'a AtomicBool,
) -> impl Stream<Item = Result<HttpFile, crate::Error>> + 'a {
let target: GitHubRepoSourceKind = policy.from.as_str().parse().unwrap();
let url = format!(
"{}/{}/{}?{}",
"{}/{}?{}",
policy
.properties
.get("api_url")
.unwrap_or(&"https://api.github.com".to_string())
.trim_end_matches('/'),
&policy.from.trim_matches('/'),
GitHubArtifactKind::Release.api_endpoint(),
target.api_endpoint(GitHubArtifactKind::Release),
policy.properties.get("query").unwrap_or(&"".to_string())
);
)
.trim_end_matches('?')
.to_string();

async_stream::stream! {
for await repo in self.client.get_paginated::<GitHubRepo>(url, &policy.credentials, cancel) {
if let Err(e) = repo {
yield Err(e);
continue;
}

let repo: GitHubRepo = repo.unwrap();
if matches!(target, GitHubRepoSourceKind::Repo(_)) {
let repo: GitHubRepo = self.client.get(url, &policy.credentials, cancel).await?;

if !repo.has_downloads {
continue;
for await file in self.load_releases(policy, &repo, cancel) {
yield file;
}

let releases_url = format!("{}/releases", repo.url);

for await release in self.client.get_paginated::<GitHubRelease>(releases_url, &policy.credentials, cancel) {
if let Err(e) = release {
} else {
for await repo in self.client.get_paginated::<GitHubRepo>(url, &policy.credentials, cancel) {
if let Err(e) = repo {
yield Err(e);
continue;
}

let release: GitHubRelease = release.unwrap();

if let Some(tarball_url) = &release.tarball_url {
yield Ok(HttpFile::new(format!("{}/{}/source.tar.gz", &repo.full_name, &release.tag_name), tarball_url)
.with_metadata_source(&repo)
.with_metadata_source(&release)
.with_metadata("asset.source-code", true)
.with_credentials(match &policy.credentials {
Credentials::Token(token) => Credentials::UsernamePassword {
username: token.clone(),
password: "".to_string(),
},
creds => creds.clone(),
})
.with_last_modified(release.published_at));
}
let repo: GitHubRepo = repo.unwrap();

for asset in release.assets.iter() {
if cancel.load(std::sync::atomic::Ordering::Relaxed) {
return;
}

if asset.state != "uploaded" {
continue;
}

let asset_url = format!("{}/releases/assets/{}", repo.url, asset.id);

yield Ok(HttpFile::new(format!("{}/{}/{}", &repo.full_name, &release.tag_name, &asset.name), asset_url)
.with_content_type(Some("application/octet-stream".to_string()))
.with_credentials(match &policy.credentials {
Credentials::Token(token) => Credentials::UsernamePassword {
username: token.clone(),
password: "".to_string(),
},
creds => creds.clone(),
})
.with_last_modified(Some(asset.updated_at))
.with_metadata_source(&repo)
.with_metadata_source(&release)
.with_metadata_source(asset));
for await file in self.load_releases(policy, &repo, cancel) {
yield file;
}
}
}
Expand Down
Loading

0 comments on commit ffe90f9

Please sign in to comment.