From 16a4bbbd7a194900852b2e346b395a360e1a49f4 Mon Sep 17 00:00:00 2001
From: Leon Reteig <leonreteig@gmail.com>
Date: Mon, 5 Feb 2024 16:33:14 -0500
Subject: [PATCH] Add extra residue_type column

Either eplet or reactivity pattern
---
 R/eplet_registry.R                   |  4 ++++
 tests/testthat/test-eplet_registry.R | 21 +++++++++++++++++++--
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/R/eplet_registry.R b/R/eplet_registry.R
index c93a798..cd2b889 100644
--- a/R/eplet_registry.R
+++ b/R/eplet_registry.R
@@ -293,6 +293,10 @@ scrape_eplet_registry <- function(file_path) {
   ) |>
     purrr::map(tidyr::as_tibble) |> # make a dataframe out of each scraped db
     purrr::list_rbind() |> # combine into one dataframe
+    dplyr::mutate(residue_type = dplyr::case_when(
+      stringr::str_detect(.data$name, "\\+") ~ "reactivity pattern",
+      .default = "eplet"
+    ), .after = "name") |>
     # get full description from info if it exists
     dplyr::mutate(description = dplyr::coalesce(
       .data$descr_info,
diff --git a/tests/testthat/test-eplet_registry.R b/tests/testthat/test-eplet_registry.R
index 22c2d22..298db0a 100644
--- a/tests/testthat/test-eplet_registry.R
+++ b/tests/testthat/test-eplet_registry.R
@@ -179,14 +179,15 @@ test_that("load_eplet_registry prints message when print_version = TRUE", {
   expect_message(load_eplet_registry(print_version = TRUE))
 })
 
-test_that("table has 8 columns", {
-  expect_equal(length(df_eplets), 8)
+test_that("table has 9 columns", {
+  expect_equal(length(df_eplets), 9)
 })
 
 test_that("column names and types are correct", {
   eplet_registry_info <- c(
     id = "character",
     name = "character",
+    residue_type = "character",
     description = "character",
     exposition = "character",
     confirmation = "character",
@@ -208,6 +209,12 @@ test_that("low cardinality character columns contain expected values", {
     c("Very Low", "Low", "Intermediate", "High", NA)
   )
 
+  # residue type
+  expect_setequal(
+    unique(df_eplets$residue_type),
+    c("eplet", "reactivity pattern")
+  )
+
   # database
   expect_setequal(
     unique(df_eplets$locus_group),
@@ -236,6 +243,16 @@ test_that("table has no duplicate eplets", {
 })
 
 test_that("a few randomly selected cells have same value as on the website", {
+  expect_equal(
+    dplyr::pull(df_eplets[df_eplets$name == "3P", ], "residue_type")[1],
+    "eplet"
+  )
+
+  expect_equal(
+    dplyr::pull(df_eplets[df_eplets$name == "77N+85VG", ], "residue_type")[1],
+    "reactivity pattern"
+  )
+
   expect_equal(
     dplyr::pull(df_eplets[df_eplets$name == "37Y", ], "exposition")[1],
     "High"