Skip to content

Commit

Permalink
Initial code changes for metadata validation
Browse files Browse the repository at this point in the history
Add scalafmt plugin
  • Loading branch information
vimleshtna committed Sep 12, 2023
1 parent 9fa1432 commit ac72f79
Show file tree
Hide file tree
Showing 10 changed files with 651 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ jobs:
message: ":warning: Secrets found in repository ${{ inputs.repo-name }}"
slack-url: ${{ secrets.SLACK_WEBHOOK }}
- name: Run tests
run: sbt test
run: sbt scalafmtCheckAll test
4 changes: 4 additions & 0 deletions .scalafmt.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
version = 3.7.11
preset = default
runner.dialect = scala213
maxColumn = 180
1 change: 1 addition & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ lazy val root = (project in file("."))
.settings(
name := "tdr-metadata-validation",
libraryDependencies ++= Seq(
commonsLang3,
scalaTest % Test,
)
)
2 changes: 1 addition & 1 deletion project/Dependencies.scala
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import sbt._

object Dependencies {
lazy val commonsLang3 = "org.apache.commons" % "commons-lang3" % "3.13.0"
lazy val scalaTest = "org.scalatest" %% "scalatest" % "3.2.12"
lazy val mockito = "org.mockito" %% "mockito-scala" % "1.17.7"
}
1 change: 1 addition & 0 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
addSbtPlugin("com.github.sbt" % "sbt-release" % "1.1.0")
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.0")
resolvers += Resolver.jcenterRepo
addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "3.9.21")
addSbtPlugin("com.github.sbt" % "sbt-pgp" % "2.2.1")
157 changes: 157 additions & 0 deletions src/main/scala/uk/gov/nationalarchives/tdr/validation/DataType.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
package uk.gov.nationalarchives.tdr.validation

import uk.gov.nationalarchives.tdr.validation.ErrorCode._

import java.time.{LocalDateTime, Year}
import scala.util.control.Exception.allCatch

sealed trait DataType extends AnyRef

case object Integer extends AnyRef with DataType with Product with Serializable {
def checkValue(value: String, criteria: MetadataCriteria): Option[String] = {
value match {
case "" if criteria.required => Some(EMPTY_VALUE_ERROR)
case t if allCatch.opt(t.toInt).isEmpty => Some(NUMBER_ONLY_ERROR)
case t if t.toInt < 0 => Some(NEGATIVE_NUMBER_ERROR)
case _ => None
}
}
}

case object DateTime extends AnyRef with DataType with Product with Serializable {
def checkValue(value: String, criteria: MetadataCriteria): Option[String] = {
value match {
case "" if criteria.required => Some(EMPTY_VALUE_ERROR)
case "" if !criteria.required => None
case v =>
val date = v.replace("T", "-").split("[-:]")
validate(date(2), date(1), date(0), criteria)
}
}

val isInvalidDay: Int => Boolean = (day: Int) => day < 1 || day > 31
val isInvalidMonth: Int => Boolean = (month: Int) => month < 1 || month > 12
val isInvalidYear: Int => Boolean = (year: Int) => year.toString.length != 4
val isALeapYear: Int => Boolean = (year: Int) => Year.of(year).isLeap

lazy val monthsWithLessThan31Days: Map[Int, String] = Map(
2 -> "February",
4 -> "April",
6 -> "June",
9 -> "September",
11 -> "November"
)

private def validate(day: String, month: String, year: String, criteria: MetadataCriteria): Option[String] = {
val emptyDate: Boolean = day.isEmpty && month.isEmpty && year.isEmpty

emptyDate match {
case false => validateDateValues(day, month, year, criteria)
case true if criteria.required => Some(EMPTY_VALUE_ERROR)
case _ => None
}
}

private def validateDateValues(day: String, month: String, year: String, criteria: MetadataCriteria): Option[String] = {
val dayError = validateDay(day)
val monthError = if (dayError.isEmpty) validateMonth(month) else dayError
val yearError = if (monthError.isEmpty) validateYear(year) else monthError
val dayForMonthError = if (yearError.isEmpty) checkDayForTheMonthAndYear(day.toInt, month.toInt, year.toInt) else yearError
if (dayForMonthError.isEmpty) checkIfFutureDateIsAllowed(day.toInt, month.toInt, year.toInt, criteria) else dayForMonthError
}

private def validateDay(day: String): Option[String] = {
day match {
case v if v.isEmpty => Some(EMPTY_VALUE_ERROR_FOR_DAY)
case v if allCatch.opt(v.toInt).isEmpty => Some(NUMBER_ERROR_FOR_DAY)
case v if v.toInt < 0 => Some(NEGATIVE_NUMBER_ERROR_FOR_DAY)
case v if isInvalidDay(v.toInt) => Some(INVALID_NUMBER_ERROR_FOR_DAY)
case _ => None
}
}

private def validateMonth(month: String): Option[String] = {
month match {
case v if v.isEmpty => Some(EMPTY_VALUE_ERROR_FOR_MONTH)
case v if allCatch.opt(v.toInt).isEmpty => Some(NUMBER_ERROR_FOR_MONTH)
case v if v.toInt < 0 => Some(NEGATIVE_NUMBER_ERROR_FOR_MONTH)
case v if isInvalidMonth(v.toInt) => Some(INVALID_NUMBER_ERROR_FOR_MONTH)
case _ => None
}
}

private def validateYear(year: String): Option[String] = {
year match {
case v if v.isEmpty => Some(EMPTY_VALUE_ERROR_FOR_YEAR)
case v if allCatch.opt(v.toInt).isEmpty => Some(NUMBER_ERROR_FOR_YEAR)
case v if v.toInt < 0 => Some(NEGATIVE_NUMBER_ERROR_FOR_YEAR)
case v if isInvalidYear(v.toInt) => Some(INVALID_NUMBER_ERROR_FOR_YEAR)
case _ => None
}
}

private def checkDayForTheMonthAndYear(dayNumber: Int, monthNumber: Int, yearNumber: Int): Option[String] = {
val monthHasLessThan31Days = monthsWithLessThan31Days.contains(monthNumber)

if (dayNumber > 30 && monthHasLessThan31Days || dayNumber == 30 && monthNumber == 2) {
Some(INVALID_DAY_FOR_MONTH_ERROR)
} else if (dayNumber == 29 && monthNumber == 2 && !isALeapYear(yearNumber)) {
Some(INVALID_DAY_FOR_MONTH_ERROR)
} else {
None
}
}

private def checkIfFutureDateIsAllowed(day: Int, month: Int, year: Int, criteria: MetadataCriteria): Option[String] =
if (!criteria.isFutureDateAllowed && LocalDateTime.now().isBefore(LocalDateTime.of(year, month, day, 0, 0))) {
Some(FUTURE_DATE_ERROR)
} else {
None
}
}

case object Text extends AnyRef with DataType with Product with Serializable {

def checkValue(value: String, criteria: MetadataCriteria): Option[String] = {
val definedValues = criteria.definedValues
value match {
case "" if criteria.required => Some(EMPTY_VALUE_ERROR)
case v if definedValues.nonEmpty && !criteria.isMultiValueAllowed && v.split(",").length > 1 => Some(MULTI_VALUE_ERROR)
case v if definedValues.nonEmpty && !v.split(",").toList.forall(definedValues.contains) => Some(UNDEFINED_VALUE_ERROR)
case _ => None
}
}
}

case object Boolean extends AnyRef with DataType with Product with Serializable {
def checkValue(value: String, criteria: MetadataCriteria, requiredMetadata: Option[Metadata]): Option[String] = {
value match {
case "" if criteria.required =>
if (isRequiredMetadataIsEmpty(criteria, requiredMetadata)) {
None
} else {
Some(NO_OPTION_SELECTED_ERROR)
}
case v if criteria.requiredProperty.isDefined && requiredMetadata.exists(_.value.isEmpty) => Some(REQUIRED_PROPERTY_IS_EMPTY)
case v if !criteria.definedValues.contains(v) => Some(UNDEFINED_VALUE_ERROR)
case _ => None
}
}

def isRequiredMetadataIsEmpty(criteria: MetadataCriteria, requiredMetadata: Option[Metadata]): Boolean = {
criteria.requiredProperty.isDefined && requiredMetadata.exists(_.value.isEmpty)
}
}
case object Decimal extends AnyRef with DataType with Product with Serializable

object DataType {
def get(dataType: String): DataType = {
dataType match {
case "Integer" => Integer
case "DateTime" => DateTime
case "Text" => Text
case "Boolean" => Boolean
case "Decimal" => Decimal
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package uk.gov.nationalarchives.tdr.validation

case class Metadata(name: String, value: String)
case class MetadataCriteria(
name: String,
dataType: DataType,
required: Boolean,
isFutureDateAllowed: Boolean,
isMultiValueAllowed: Boolean,
definedValues: List[String],
requiredProperty: Option[String] = None,
dependencies: Option[Map[String, List[MetadataCriteria]]] = None,
defaultValue: Option[String] = None
)

object MetadataProperty {
val closureType = "ClosureType"
val descriptiveType = "DescriptiveType"
}

object ErrorCode {
val CLOSURE_STATUS_IS_MISSING = "CLOSURE_STATUS_IS_MISSING"
val CLOSURE_METADATA_EXISTS_WHEN_FILE_IS_OPEN = "CLOSURE_METADATA_EXISTS_WHEN_FILE_IS_OPEN"
val NUMBER_ONLY_ERROR = "NUMBER_ONLY_ERROR"
val NEGATIVE_NUMBER_ERROR = "NEGATIVE_NUMBER_ERROR"
val EMPTY_VALUE_ERROR = "EMPTY_VALUE_ERROR"
val NO_OPTION_SELECTED_ERROR = "NO_OPTION_SELECTED_ERROR"
val INVALID_DATE_FORMAT_ERROR = "INVALID_DATE_FORMAT_ERROR"
val INVALID_NUMBER_ERROR = "INVALID_NUMBER_ERROR"
val EMPTY_VALUE_ERROR_FOR_DAY = "EMPTY_VALUE_ERROR_FOR_DAY"
val NUMBER_ERROR_FOR_DAY = "NUMBER_ERROR_FOR_DAY"
val NEGATIVE_NUMBER_ERROR_FOR_DAY = "NEGATIVE_NUMBER_ERROR_FOR_DAY"
val INVALID_NUMBER_ERROR_FOR_DAY = "INVALID_NUMBER_ERROR_FOR_DAY"
val EMPTY_VALUE_ERROR_FOR_MONTH = "EMPTY_VALUE_ERROR_FOR_MONTH"
val NUMBER_ERROR_FOR_MONTH = "NUMBER_ERROR_FOR_MONTH"
val NEGATIVE_NUMBER_ERROR_FOR_MONTH = "NEGATIVE_NUMBER_ERROR_FOR_MONTH"
val INVALID_NUMBER_ERROR_FOR_MONTH = "INVALID_NUMBER_ERROR_FOR_MONTH"
val EMPTY_VALUE_ERROR_FOR_YEAR = "EMPTY_VALUE_ERROR_FOR_YEAR"
val NUMBER_ERROR_FOR_YEAR = "NUMBER_ERROR_FOR_YEAR"
val NEGATIVE_NUMBER_ERROR_FOR_YEAR = "NEGATIVE_NUMBER_ERROR_FOR_YEAR"
val INVALID_NUMBER_ERROR_FOR_YEAR = "INVALID_NUMBER_ERROR_FOR_YEAR"
val INVALID_DAY_FOR_MONTH_ERROR = "INVALID_DAY_FOR_MONTH_ERROR"
val FUTURE_DATE_ERROR = "FUTURE_DATE_ERROR"
val MULTI_VALUE_ERROR = "MULTI_VALUE_ERROR"
val UNDEFINED_VALUE_ERROR = "UNDEFINED_VALUE_ERROR"
val REQUIRED_PROPERTY_IS_EMPTY = "REQUIRED_PROPERTY_IS_EMPTY"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
package uk.gov.nationalarchives.tdr.validation

import org.apache.commons.lang3.BooleanUtils
import uk.gov.nationalarchives.tdr.validation.ErrorCode._
import uk.gov.nationalarchives.tdr.validation.MetadataProperty._

case class FileRow(fileName: String, metadata: List[Metadata])
case class Error(propertyName: String, errorCode: String)

class MetadataValidation(closureMetadataCriteria: MetadataCriteria, descriptiveMetadataCriteria: List[MetadataCriteria]) {

def validateMetadata(fileRows: List[FileRow]): Map[String, List[Error]] = {
fileRows.map(row => row.fileName -> (validateClosureMetadata(row.metadata) ++ validateDescriptiveMetadata(row.metadata))).toMap
}

def validateClosureMetadata(input: List[Metadata]): List[Error] = {
val closureStatus = input.find(_.name == closureType)
closureStatus match {
case Some(Metadata(_, "Open")) =>
val hasAnyClosureMetadata = hasClosureMetadata(input, closureMetadataCriteria.dependencies.flatMap(_.get("Closed")))
if (hasAnyClosureMetadata) {
List(Error(closureType, CLOSURE_METADATA_EXISTS_WHEN_FILE_IS_OPEN))
} else {
List.empty
}
case Some(Metadata(_, "Closed")) => validateMetadata(input, closureMetadataCriteria.dependencies.flatMap(_.get("Closed")).getOrElse(Nil))
case Some(Metadata(_, _)) => List(Error(closureType, UNDEFINED_VALUE_ERROR))
case None => List(Error(closureType, CLOSURE_STATUS_IS_MISSING))
}
}

def hasClosureMetadata(input: List[Metadata], metadataCriteria: Option[List[MetadataCriteria]]): Boolean = {
metadataCriteria.exists(
_.exists(criteria =>
input.find(_.name == criteria.name).exists(_.value != criteria.defaultValue.getOrElse(""))
|| hasClosureMetadata(input, criteria.dependencies.flatMap(_.get(criteria.defaultValue.getOrElse(""))))
)
)
}

def validateDescriptiveMetadata(input: List[Metadata]): List[Error] = validateMetadata(input, descriptiveMetadataCriteria)

private def validateMetadata(input: List[Metadata], metadataCriteria: List[MetadataCriteria]): List[Error] = {
input.flatMap(metadata => {
metadataCriteria
.find(_.name == metadata.name)
.flatMap(criteria => {
val value = metadata.value
val errorCode = criteria.dataType match {
case Integer | Decimal => Integer.checkValue(value, criteria)
case Boolean =>
Boolean.checkValue(value, criteria, criteria.requiredProperty.flatMap(p => input.find(_.name == p))) match {
case None if value.nonEmpty =>
criteria.dependencies
.flatMap(
_.collect {
case (definedValue, criteria) if BooleanUtils.toBoolean(definedValue) == BooleanUtils.toBoolean(value) =>
validateMetadata(input.filter(r => criteria.exists(_.name == r.name)), criteria.map(_.copy(required = true))).map(_.errorCode).headOption
}.head
)
case error => error
}
case Text => Text.checkValue(value, criteria)
case DateTime => DateTime.checkValue(value, criteria)
case _ => None
}
errorCode.map(Error(criteria.name, _))
})
})
}
}
Loading

0 comments on commit ac72f79

Please sign in to comment.