Skip to content

Commit

Permalink
sending to CRAN with fix suggested by Ivan Krylov
Browse files Browse the repository at this point in the history
  • Loading branch information
pachadotdev committed Nov 15, 2024
1 parent bbb6524 commit 6f5fcce
Show file tree
Hide file tree
Showing 12 changed files with 94 additions and 99 deletions.
1 change: 0 additions & 1 deletion rpkg/.Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
^vignettes/CP2017CHL$
^\.covrignore$
^cran-comments\.md$
^inst/extdata/galapagos$
^_pkgdown\.yml$
^microdata$
^redatam$
Expand Down
1 change: 0 additions & 1 deletion rpkg/.gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
.vscode
dev
inst/extdata/galapagos
src/*.o
src/*.so
src/redatamlib/*.o
Expand Down
8 changes: 6 additions & 2 deletions rpkg/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,19 @@ Authors@R: c(
given = "Lital",
family = "Barkai",
role = "aut"),
person(
given = "Ivan",
family = "Krylov",
role = "ctb"),
person(
given = "Arseny",
family = "Kapoulkine",
role = "ctb",
comment = "'pugixml' C++ library"),
person(
family = "Republic of Ecuador",
family = "Oriental Republic of Uruguay",
role = "dtc",
comment = "Galapagos census data")
comment = "Aggregated census data from 2011")
)
Imports:
data.table,
Expand Down
13 changes: 2 additions & 11 deletions rpkg/NEWS.md
Original file line number Diff line number Diff line change
@@ -1,19 +1,10 @@
# redatam 2.0.4
# redatam 2.0.1

* Refactored C++ to R list casting to avoid growing a list in a loop. It now
creates a list with a lenght equal to the number of entities and variables
with descriptions and then fills it with the data. This is 3 to 5 times
faster.

# redatam 2.0.3

* Fixes memory management issues suggested by Ivan Krylov regarding the C++ to R
list casting.
* Uses 2 threads during R CMD check

# redatam 2.0.1

* Fixes memory leaks warned by CRAN on clang-ASAN and gcc-UBSAN.
* Fixes GCC-SAN errors with a fix proposed by Ivan Krylov. Thanks!

# redatam 2.0.0

Expand Down
10 changes: 6 additions & 4 deletions rpkg/R/read_redatam.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
#' @return A list of tibbles, each one representing a table with the census
#' entities (or levels) and their attributes (or variables).
#' @examples
#' # Read a dictionary file (Galapagos 2015)
#' zip <- system.file("extdata", "galapagos.zip", package = "redatam")
#' dout <- paste(tempdir(), "galapagos", sep = "/")
#' # Read a dictionary file (Uruguay 2011, aggregated)
#' zip <- system.file("extdata", "uru2011mini.zip", package = "redatam")
#' dout <- paste(tempdir(), "uru2011mini", sep = "/")
#' unzip(zip, exdir = dout)
#' read_redatam(paste(dout, "cg15.dic", sep = "/"))
#' read_redatam(paste(dout, "uru2011mini.dic", sep = "/"))
#' # or equivalently
#' read_redatam(paste(dout, "uru2011mini.dicx", sep = "/"))
#' @export
read_redatam <- function(dictionary) {
dictionary <- normalizePath(dictionary)
Expand Down
Binary file removed rpkg/inst/extdata/galapagos.zip
Binary file not shown.
Binary file added rpkg/inst/extdata/uru2011mini.zip
Binary file not shown.
10 changes: 6 additions & 4 deletions rpkg/man/read_redatam.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion rpkg/man/redatam-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 13 additions & 9 deletions rpkg/src/redatamlib/readers/ByteArrayReader.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include <algorithm> // std::search
#include <fstream> // std::ifstream
#include <iterator> // std::ostream_iterator, std::istreambuf_iterator
#include <iterator> // std::ostream_iterator, std::istreambuf_iterator
#include <sstream> // std::ostringstream

#include "ByteArrayReader.hpp"
Expand Down Expand Up @@ -175,23 +175,27 @@ unsigned char ByteArrayReader::ReadByte() {
}

uint16_t ByteArrayReader::ReadInt16LE() {
return static_cast<uint16_t>(ReadByte()) |
(static_cast<uint16_t>(ReadByte()) << 8);
uint16_t a = static_cast<uint16_t>(ReadByte());
uint16_t b = static_cast<uint16_t>(ReadByte()) << 8;
return a | b;
}

uint32_t ByteArrayReader::ReadInt32LE() {
return static_cast<uint32_t>(ReadInt16LE()) |
static_cast<uint32_t>(ReadInt16LE()) << 16;
uint32_t a = static_cast<uint32_t>(ReadInt16LE());
uint32_t b = static_cast<uint32_t>(ReadInt16LE()) << 16;
return a | b;
}

uint16_t ByteArrayReader::ReadInt16BE() {
return (static_cast<uint16_t>(ReadByte()) << 8) |
static_cast<uint16_t>(ReadByte());
uint16_t a = static_cast<uint16_t>(ReadByte()) << 8;
uint16_t b = static_cast<uint16_t>(ReadByte());
return a | b;
}

uint32_t ByteArrayReader::ReadInt32BE() {
return (static_cast<uint32_t>(ReadInt16BE()) << 16) |
static_cast<uint32_t>(ReadInt16BE());
uint32_t a = static_cast<uint32_t>(ReadInt16BE()) << 16;
uint32_t b = static_cast<uint32_t>(ReadInt16BE());
return a | b;
}

} // namespace RedatamLib
66 changes: 0 additions & 66 deletions rpkg/tests/testthat/test-galapagos.R

This file was deleted.

59 changes: 59 additions & 0 deletions rpkg/tests/testthat/test-uruguay.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# the Galapagos census was downloaded from
# https://redatam.org/cdr/descargas/censos/poblacion/CP2011URY.zip
# I agregated the data to test the code

# read DICX
# when converting the original DIC to DICX in REDATAM 7:
# 1. it doesn't include all the labels
# 2. it adds MISSING and NOTAPPLICABLE to the labels
# 3. it creates problems with lower and upper case (e.g., CG150001.ptr and
# cg150001.ptr)

test_that("reading works", {
zip <- system.file("extdata", "uru2011mini.zip", package = "redatam")

dout <- paste(tempdir(), "uru2011mini", sep = "/")

if (file.exists(dout)) {
unlink(dout, recursive = T)
}

# unzip the file
if (!file.exists(dout)) {
unzip(zip, exdir = dout)
}

# find the dictionary
dic <- list.files(dout, pattern = "\\.dic$", full.names = TRUE,
recursive = TRUE)

dicx <- list.files(dout, pattern = "\\.dicx$", full.names = TRUE,
recursive = TRUE)

# read DIC

res <- read_redatam(dic)

expect_type(res, "list")
expect_equal(length(res), 3L)

d <- res$sexo
expect_true(is.data.frame(d))
expect_true(is.factor(d$sexo))
expect_equal(dim(d), c(38L,4L))

daux <- res$sexo_labels_cuenta
expect_true(is.factor(daux$cuenta_description))
expect_equal(dim(daux), c(2L,2L))

# read DICX

res2 <- read_redatam(dicx)

expect_type(res2, "list")
expect_equal(length(res2), 2L)

d2 <- res2$sexo
expect_true(is.data.frame(d2))
expect_equal(dim(d2), c(38L,4L))
})

0 comments on commit 6f5fcce

Please sign in to comment.