Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding a prune data package method #56

Merged
merged 1 commit into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export(obfuscate_gps)
export(obfuscate_lat)
export(obfuscate_lon)
export(othertext_lookup)
export(prune_datapackage)
export(read_excel_all_sheets)
export(read_googlesheets)
export(remove_deletions)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#' @param resource_name Character. Item within the datapackage to be updated
#' @param resource_path Character. Path to csv file
#' @param data_package_path Character. Path to datapackage.json file
#' @param prune_datapackage Logical. Should properties not in the structural metadata
#' be removed?
#'
#' @return Updates the datapackage, returns nothing
#' @export
Expand Down Expand Up @@ -46,7 +48,8 @@
expand_frictionless_metadata <- function(structural_metadata,
resource_name,
resource_path,
data_package_path ){
data_package_path,
prune_datapackage = TRUE){

data_package <- frictionless::read_package(data_package_path)

Expand Down Expand Up @@ -82,6 +85,10 @@ expand_frictionless_metadata <- function(structural_metadata,
my_data_schema$fields[[idx]] <- x
}

if(prune_datapackage){
my_data_schema <- prune_datapackage(my_data_schema,structural_metadata)
}

# update the datapackage.json
data_package <- data_package|>
frictionless::remove_resource(resource_name) |>
Expand All @@ -95,3 +102,37 @@ expand_frictionless_metadata <- function(structural_metadata,

invisible()
}


#' Prune data pacakge
#'
#' method to remove properties from the metadata for a dataset in a datapackage
#'
#' @param my_data_schema list. schema object from frictionless
#' @param structural_metadata dataframe. structural metadata for a dataset
#'
#' @return pruned data_schema -
#' @export
#'
prune_datapackage <- function(my_data_schema, structural_metadata){

# get property names
property_names <- names(structural_metadata)

# add minimal property values
property_names_complete <- append(c("name","type"),property_names) |>
unique()

# create storage object
my_data_schema_pruned <- my_data_schema

# map over fields and remove metadata items not in property names complete
my_data_schema_pruned$fields <- purrr::map(my_data_schema$fields, function(schema_item){

properties_to_drop <- names(schema_item) %in% property_names_complete
out <- schema_item[properties_to_drop]
return(out)
})

return(my_data_schema_pruned)
}
8 changes: 6 additions & 2 deletions man/expand_frictionless_metadata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/prune_datapackage.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions vignettes/metadata.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,17 @@ expand_frictionless_metadata(structural_metadata = structural_metadata,
resource_path = "data_examples/my_data.csv",
data_package_path = "data_examples/datapackage.json")

## remove an element from the structural metadata and datapackage

# dropping the comments field because comments field
structural_metadata <- structural_metadata[-5]

expand_frictionless_metadata(structural_metadata = structural_metadata,
resource_name = "my_data", # name of the file with no extension
resource_path = "data_examples/my_data.csv",
data_package_path = "data_examples/datapackage.json",
prune_datapackage = TRUE) # this is the default

# there are methods for embargoing or restricting deposits in {deposits}


Expand Down