Skip to content

Commit

Permalink
adding a prune data package method
Browse files Browse the repository at this point in the history
  • Loading branch information
collinschwantes committed Sep 11, 2024
1 parent 9b1c4bf commit 3214aed
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 3 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export(obfuscate_gps)
export(obfuscate_lat)
export(obfuscate_lon)
export(othertext_lookup)
export(prune_datapackage)
export(read_excel_all_sheets)
export(read_googlesheets)
export(remove_deletions)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#' @param resource_name Character. Item within the datapackage to be updated
#' @param resource_path Character. Path to csv file
#' @param data_package_path Character. Path to datapackage.json file
#' @param prune_datapackage Logical. Should properties not in the structural metadata
#' be removed?
#'
#' @return Updates the datapackage, returns nothing
#' @export
Expand Down Expand Up @@ -46,7 +48,8 @@
expand_frictionless_metadata <- function(structural_metadata,
resource_name,
resource_path,
data_package_path ){
data_package_path,
prune_datapackage = TRUE){

data_package <- frictionless::read_package(data_package_path)

Expand Down Expand Up @@ -82,6 +85,10 @@ expand_frictionless_metadata <- function(structural_metadata,
my_data_schema$fields[[idx]] <- x
}

if(prune_datapackage){
my_data_schema <- prune_datapackage(my_data_schema,structural_metadata)
}

# update the datapackage.json
data_package <- data_package|>
frictionless::remove_resource(resource_name) |>
Expand All @@ -95,3 +102,37 @@ expand_frictionless_metadata <- function(structural_metadata,

invisible()
}


#' Prune data pacakge
#'
#' method to remove properties from the metadata for a dataset in a datapackage
#'
#' @param my_data_schema list. schema object from frictionless
#' @param structural_metadata dataframe. structural metadata for a dataset
#'
#' @return pruned data_schema -
#' @export
#'
prune_datapackage <- function(my_data_schema, structural_metadata){

# get property names
property_names <- names(structural_metadata)

# add minimal property values
property_names_complete <- append(c("name","type"),property_names) |>
unique()

# create storage object
my_data_schema_pruned <- my_data_schema

# map over fields and remove metadata items not in property names complete
my_data_schema_pruned$fields <- purrr::map(my_data_schema$fields, function(schema_item){

properties_to_drop <- names(schema_item) %in% property_names_complete
out <- schema_item[properties_to_drop]
return(out)
})

return(my_data_schema_pruned)
}
8 changes: 6 additions & 2 deletions man/expand_frictionless_metadata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/prune_datapackage.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions vignettes/metadata.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,17 @@ expand_frictionless_metadata(structural_metadata = structural_metadata,
resource_path = "data_examples/my_data.csv",
data_package_path = "data_examples/datapackage.json")
## remove an element from the structural metadata and datapackage
# dropping the comments field because comments field
structural_metadata <- structural_metadata[-5]
expand_frictionless_metadata(structural_metadata = structural_metadata,
resource_name = "my_data", # name of the file with no extension
resource_path = "data_examples/my_data.csv",
data_package_path = "data_examples/datapackage.json",
prune_datapackage = TRUE) # this is the default
# there are methods for embargoing or restricting deposits in {deposits}
Expand Down

0 comments on commit 3214aed

Please sign in to comment.