Skip to content

Commit

Permalink
update regexes
Browse files Browse the repository at this point in the history
  • Loading branch information
NikhilShahi committed Sep 26, 2022
1 parent a6cf7a4 commit eed141b
Showing 1 changed file with 58 additions and 56 deletions.
114 changes: 58 additions & 56 deletions backend/src/services/scanner/regexp.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
const boundaryPrefix = String.raw`(?<!\S)`
const boundarySuffix = String.raw`(?!\S)`
/*** Patterns ***/

// Address patterns
Expand Down Expand Up @@ -44,87 +46,87 @@ const addressPattern = String.raw`(?:${streetPattern}\s${cityStateZipPattern})|(

// Driver License patterns
const dlPatternMap: Record<string, string> = {
AL: String.raw`\b\d{7}\b`,
AZ: String.raw`\b[a-zA-Z][0-9]{8}\b|\b[0-9]{9}\b`,
AR: String.raw`\b9[0-9]{8}\b`,
CA: String.raw`\b[a-zA-Z][0-9]{7}\b`,
CO: String.raw`\b[0-9]{2}-[0-9]{3}-[0-9]{4}\b`,
FL: String.raw`\b[a-zA-Z][0-9]{12}\b|\b[a-zA-Z][0-9]{3}-[0-9]{3}-[0-9]{2}-[0-9]{3}-[0-9]\b|\b[a-zA-Z]-[0-9]{3}-[0-9]{3}-[0-9]{3}-[0-9]{3}\b`,
ID: String.raw`\b[a-zA-Z]{2}[0-9]{6}[a-zA-Z]\b`,
IL: String.raw`\b[a-zA-Z][0-9]{11}\b|\b[a-zA-Z][0-9]{3}-[0-9]{4}-[0-9]{4}\b`,
IN: String.raw`\b[0-9]{4}-[0-9]{2}-[0-9]{4}\b`,
IA: String.raw`\b[0-9]{3}[a-zA-Z]{2}[0-9]{4}\b`,
KS: String.raw`\b[a-zA-Z][0-9]{2}-[0-9]{2}-[0-9]{4}\b`,
KY: String.raw`\b[a-zA-Z][0-9]{2}-[0-9]{3}-[0-9]{3}\b`,
MA: String.raw`\bS[0-9]{8}\b|\bSA[0-9]{7}\b`,
MI: String.raw`\b[a-zA-Z][ ][0-9]{3}[ ][0-9]{3}[ ][0-9]{3}[ ][0-9]{3}\b`,
MN: String.raw`\b[a-zA-Z][0-9]{3}-[0-9]{3}-[0-9]{3}-[0-9]{3}\b`,
MS: String.raw`\b[0-9]{3}-[0-9]{2}-[0-9]{4}\b`,
MT: String.raw`\b(?:(?:[0][1-9]|[1][0-2])[0-9]{3}(?:[1-9][0-9]{3})41(?:[0][1-9]|[1][0-9]|[2][0-9]|[3][0-1]))\b`,
NV: String.raw`\b[0-9]{10}\b|\b[0-9]{12}\b`,
NH: String.raw`\b(?:[0][1-9]|[1][0-2])[a-zA-Z]{3}[0-9]{2}(?:0[1-9]|[1-2][0-9]|3[0-1])[0-9]\b`,
NJ: String.raw`\b[a-zA-Z][0-9]{4} [0-9]{5} [0-9]{5}\b|\b[a-zA-Z][0-9]{14}\b`,
NY: String.raw`\b[0-9]{3} [0-9]{3} [0-9]{3}\b`,
ND: String.raw`\b[a-zA-Z]{3}-[0-9]{2}-[0-9]{4}\b|\b[a-zA-Z][0-9]{9}\b`,
OH: String.raw`\b[a-zA-Z]{2}[0-9]{6}\b`,
PA: String.raw`\b[0-9]{2} [0-9]{3} [0-9]{3}\b`,
RI: String.raw`\b[1-9]{2}[0-9]{5}\b`,
SD: String.raw`\b[0-9]{8}\b`,
VT: String.raw`\b[0-9]{7}[a-zA-Z]\b`,
WA: String.raw`\b[a-zA-Z\*]{5}[a-zA-Z]{2}[0-9]{3}[a-zA-Z0-9]{2}\b|\bWDL[a-zA-Z0-9]{9}\b`,
WV: String.raw`\b[a-zA-Z][0-9]{6}\b`,
WI: String.raw`\b[a-zA-Z][0-9]{3}-[0-9]{4}-[0-9]{4}-[0-9]{2}\b|\b[a-zA-Z][0-9]{13}\b`,
WY: String.raw`\b[0-9]{6}-[0-9]{3}\b`,
AL: String.raw`${boundaryPrefix}\d{7}${boundarySuffix}`,
AZ: String.raw`${boundaryPrefix}[a-zA-Z][0-9]{8}${boundarySuffix}|${boundaryPrefix}[0-9]{9}${boundarySuffix}`,
AR: String.raw`${boundaryPrefix}9[0-9]{8}${boundarySuffix}`,
CA: String.raw`${boundaryPrefix}[a-zA-Z][0-9]{7}${boundarySuffix}`,
CO: String.raw`${boundaryPrefix}[0-9]{2}-[0-9]{3}-[0-9]{4}${boundarySuffix}`,
FL: String.raw`${boundaryPrefix}[a-zA-Z][0-9]{12}${boundarySuffix}|${boundaryPrefix}[a-zA-Z][0-9]{3}-[0-9]{3}-[0-9]{2}-[0-9]{3}-[0-9]${boundarySuffix}|${boundaryPrefix}[a-zA-Z]-[0-9]{3}-[0-9]{3}-[0-9]{3}-[0-9]{3}${boundarySuffix}`,
ID: String.raw`${boundaryPrefix}[a-zA-Z]{2}[0-9]{6}[a-zA-Z]${boundarySuffix}`,
IL: String.raw`${boundaryPrefix}[a-zA-Z][0-9]{11}${boundarySuffix}|${boundaryPrefix}[a-zA-Z][0-9]{3}-[0-9]{4}-[0-9]{4}${boundarySuffix}`,
IN: String.raw`${boundaryPrefix}[0-9]{4}-[0-9]{2}-[0-9]{4}${boundarySuffix}`,
IA: String.raw`${boundaryPrefix}[0-9]{3}[a-zA-Z]{2}[0-9]{4}${boundarySuffix}`,
KS: String.raw`${boundaryPrefix}[a-zA-Z][0-9]{2}-[0-9]{2}-[0-9]{4}${boundarySuffix}`,
KY: String.raw`${boundaryPrefix}[a-zA-Z][0-9]{2}-[0-9]{3}-[0-9]{3}${boundarySuffix}`,
MA: String.raw`${boundaryPrefix}S[0-9]{8}${boundarySuffix}|${boundaryPrefix}SA[0-9]{7}${boundarySuffix}`,
MI: String.raw`${boundaryPrefix}[a-zA-Z][ ][0-9]{3}[ ][0-9]{3}[ ][0-9]{3}[ ][0-9]{3}${boundarySuffix}`,
MN: String.raw`${boundaryPrefix}[a-zA-Z][0-9]{3}-[0-9]{3}-[0-9]{3}-[0-9]{3}${boundarySuffix}`,
MS: String.raw`${boundaryPrefix}[0-9]{3}-[0-9]{2}-[0-9]{4}${boundarySuffix}`,
MT: String.raw`${boundaryPrefix}(?:(?:[0][1-9]|[1][0-2])[0-9]{3}(?:[1-9][0-9]{3})41(?:[0][1-9]|[1][0-9]|[2][0-9]|[3][0-1]))${boundarySuffix}`,
NV: String.raw`${boundaryPrefix}[0-9]{10}${boundarySuffix}|${boundaryPrefix}[0-9]{12}${boundarySuffix}`,
NH: String.raw`${boundaryPrefix}(?:[0][1-9]|[1][0-2])[a-zA-Z]{3}[0-9]{2}(?:0[1-9]|[1-2][0-9]|3[0-1])[0-9]${boundarySuffix}`,
NJ: String.raw`${boundaryPrefix}[a-zA-Z][0-9]{4} [0-9]{5} [0-9]{5}${boundarySuffix}|${boundaryPrefix}[a-zA-Z][0-9]{14}${boundarySuffix}`,
NY: String.raw`${boundaryPrefix}[0-9]{3} [0-9]{3} [0-9]{3}${boundarySuffix}`,
ND: String.raw`${boundaryPrefix}[a-zA-Z]{3}-[0-9]{2}-[0-9]{4}${boundarySuffix}|${boundaryPrefix}[a-zA-Z][0-9]{9}${boundarySuffix}`,
OH: String.raw`${boundaryPrefix}[a-zA-Z]{2}[0-9]{6}${boundarySuffix}`,
PA: String.raw`${boundaryPrefix}[0-9]{2} [0-9]{3} [0-9]{3}${boundarySuffix}`,
RI: String.raw`${boundaryPrefix}[1-9]{2}[0-9]{5}${boundarySuffix}`,
SD: String.raw`${boundaryPrefix}[0-9]{8}${boundarySuffix}`,
VT: String.raw`${boundaryPrefix}[0-9]{7}[a-zA-Z]${boundarySuffix}`,
WA: String.raw`${boundaryPrefix}[a-zA-Z\*]{5}[a-zA-Z]{2}[0-9]{3}[a-zA-Z0-9]{2}${boundarySuffix}|${boundaryPrefix}WDL[a-zA-Z0-9]{9}${boundarySuffix}`,
WV: String.raw`${boundaryPrefix}[a-zA-Z][0-9]{6}${boundarySuffix}`,
WI: String.raw`${boundaryPrefix}[a-zA-Z][0-9]{3}-[0-9]{4}-[0-9]{4}-[0-9]{2}${boundarySuffix}|${boundaryPrefix}[a-zA-Z][0-9]{13}${boundarySuffix}`,
WY: String.raw`${boundaryPrefix}[0-9]{6}-[0-9]{3}${boundarySuffix}`,
}
const driverLicensePattern = Object.values(dlPatternMap).join("|")

// Geographic Coordinate patterns
const latitudePattern = String.raw`[-+]?(?:[1-8]?\d(?:\.\d+)?|90(?:\.0+)?)`
const longitudePattern = String.raw`[-+]?(?:180(?:\.0+)?|(?:(?:1[0-7]\d)|(?:[1-9]?\d))(?:\.\d+)?)`
const coordinatePattern = String.raw`\b${latitudePattern}\s*,\s*${longitudePattern}\b`
const coordinatePattern = String.raw`${boundaryPrefix}${latitudePattern}\s*,\s*${longitudePattern}${boundarySuffix}`

// Birthday patterns (min: 1900, max: 2099)
const yyyyMmDd = String.raw`\b(?:19\d{2}|20[01][0-9]|20\d{2})[-/.](?:0[1-9]|1[012])[-/.](?:0[1-9]|[12][0-9]|3[01])\b`
const mmDdYyyy = String.raw`\b(?:0[1-9]|1[012])[-/.](?:0[1-9]|[12][0-9]|3[01])[-/.](?:19\d{2}|20[01][0-9]|20\d{2})\b`
const ddMmYyyy = String.raw`\b(?:0[1-9]|[12][0-9]|3[01])[-/.](?:0[1-9]|1[012])[-/.](?:19\d{2}|20[01][0-9]|20\d{2})\b`
const yyyyMmDd = String.raw`${boundaryPrefix}(?:19\d{2}|20[01][0-9]|20\d{2})[-/.](?:0[1-9]|1[012])[-/.](?:0[1-9]|[12][0-9]|3[01])${boundarySuffix}`
const mmDdYyyy = String.raw`${boundaryPrefix}(?:0[1-9]|1[012])[-/.](?:0[1-9]|[12][0-9]|3[01])[-/.](?:19\d{2}|20[01][0-9]|20\d{2})${boundarySuffix}`
const ddMmYyyy = String.raw`${boundaryPrefix}(?:0[1-9]|[12][0-9]|3[01])[-/.](?:0[1-9]|1[012])[-/.](?:19\d{2}|20[01][0-9]|20\d{2})${boundarySuffix}`
const dobPattern = String.raw`${yyyyMmDd}|${mmDdYyyy}|${ddMmYyyy}`

// IP address patterns
const ipV4Pattern = String.raw`\b(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b`
const ipV4Pattern = String.raw`${boundaryPrefix}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)${boundarySuffix}`

// Credit card patterns
const americanExpressPattern = String.raw`\b3[47][0-9]{13}\b|\b3[47][0-9]{2}-[0-9]{6}-[0-9]{5}\b|\b3[47][0-9]{2}[ ][0-9]{6}[ ][0-9]{5}\b`
const visaPattern = String.raw`\b4[0-9]{12}(?:[0-9]{3})?\b|\b4[0-9]{3}-[0-9]{4}-[0-9]{4}-[0-9]{4}\b|\b4[0-9]{3}[ ][0-9]{4}[ ][0-9]{4}[ ][0-9]{4}\b`
const mastercardPattern = String.raw`\b5[1-5][0-9]{14}\b|\b(?:222[1-9]|22[3-9]\d|2[3-6]\d{2}|27[0-1]\d|2720)[0-9]{12}\b`
const discoverPattern = String.raw`\b6(?:011\d{12,15}|5\d{14,17}|4[4-9]\d{13,16}|22(?:1(?:2[6-9]|[3-9]\d)|[2-8]\d{2}|9(?:[01]\d|2[0-5]))\d{10,13})\b`
const jcbPattern = String.raw`\b(?:2131|1800|35[0-9]{3})[0-9]{11}\b`
const americanExpressPattern = String.raw`${boundaryPrefix}3[47][0-9]{13}${boundarySuffix}|${boundaryPrefix}3[47][0-9]{2}-[0-9]{6}-[0-9]{5}${boundarySuffix}|${boundaryPrefix}3[47][0-9]{2}[ ][0-9]{6}[ ][0-9]{5}${boundarySuffix}`
const visaPattern = String.raw`${boundaryPrefix}4[0-9]{12}(?:[0-9]{3})?${boundarySuffix}|${boundaryPrefix}4[0-9]{3}-[0-9]{4}-[0-9]{4}-[0-9]{4}${boundarySuffix}|${boundaryPrefix}4[0-9]{3}[ ][0-9]{4}[ ][0-9]{4}[ ][0-9]{4}${boundarySuffix}`
const mastercardPattern = String.raw`${boundaryPrefix}5[1-5][0-9]{14}${boundarySuffix}|${boundaryPrefix}(?:222[1-9]|22[3-9]\d|2[3-6]\d{2}|27[0-1]\d|2720)[0-9]{12}${boundarySuffix}`
const discoverPattern = String.raw`${boundaryPrefix}6(?:011\d{12,15}|5\d{14,17}|4[4-9]\d{13,16}|22(?:1(?:2[6-9]|[3-9]\d)|[2-8]\d{2}|9(?:[01]\d|2[0-5]))\d{10,13})${boundarySuffix}`
const jcbPattern = String.raw`${boundaryPrefix}(?:2131|1800|35[0-9]{3})[0-9]{11}${boundarySuffix}`
const creditCardPattern = String.raw`${americanExpressPattern}|${visaPattern}|${mastercardPattern}|${discoverPattern}|${jcbPattern}`

// Email patterns
const emailPattern =
String.raw`(?:[a-z0-9!#$%&'*+/=?^_` +
String.raw`${boundaryPrefix}(?:[a-z0-9!#$%&'*+/=?^_` +
"`" +
String.raw`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_` +
"`" +
String.raw`{|}~-]+)*|\"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*\")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])`
String.raw`{|}~-]+)*|\"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*\")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])${boundarySuffix}`

// SSN patterns
const ssnPattern = String.raw`\b(?!000|666|333|9[0-9]{2})[0-9]{3}[- ]?(?!00)[0-9]{2}[- ]?(?!0000)[0-9]{4}\b`
const ssnPattern = String.raw`${boundaryPrefix}(?!000|666|333|9[0-9]{2})[0-9]{3}[- ]?(?!00)[0-9]{2}[- ]?(?!0000)[0-9]{4}${boundarySuffix}`

// Phone number patterns
const phoneNumberPattern = String.raw`\b(?:(?:(?<![\d-])(?:\+?\d{1,3}[-.\s*]?)?(?:\(?\d{3}\)?[-.\s*]?)?\d{3}[-.\s*]?\d{4}(?![\d-]))|(?:(?<![\d-])(?:(?:\(\+?\d{2}\))|(?:\+?\d{2}))\s*\d{2}\s*\d{3}\s*\d{4}(?![\d-])))\b`
const phoneNumberPattern = String.raw`${boundaryPrefix}(?:(?:(?<![\d-])(?:\+?\d{1,3}[-.\s*]?)?(?:\(?\d{3}\)?[-.\s*]?)?\d{3}[-.\s*]?\d{4}(?![\d-]))|(?:(?<![\d-])(?:(?:\(\+?\d{2}\))|(?:\+?\d{2}))\s*\d{2}\s*\d{3}\s*\d{4}(?![\d-])))${boundarySuffix}`

// Vehicle identification patterns
const vinPattern = String.raw`\b[A-HJ-NPR-Z0-9]{17}\b`
const vinPattern = String.raw`${boundaryPrefix}[A-HJ-NPR-Z0-9]{17}${boundarySuffix}`

/*** Compiled Regexp ***/
export const ADDRESS_REGEXP = new RegExp(addressPattern, "g")
export const DRIVER_LICENSE_REGEXP = new RegExp(driverLicensePattern, "g")
export const COORDINATE_REGEXP = new RegExp(coordinatePattern, "g")
export const DOB_REGEXP = new RegExp(dobPattern, "g")
export const IP_ADDRESS_REGEXP = new RegExp(ipV4Pattern, "g")
export const CREDIT_CARD_REGEXP = new RegExp(creditCardPattern, "g")
export const EMAIL_REGEXP = new RegExp(emailPattern, "g")
export const SSN_REGEXP = new RegExp(ssnPattern, "g")
export const PHONE_NUMBER_REGEXP = new RegExp(phoneNumberPattern, "g")
export const VIN_REGEXP = new RegExp(vinPattern, "g")
export const ADDRESS_REGEXP = new RegExp(addressPattern)
export const DRIVER_LICENSE_REGEXP = new RegExp(driverLicensePattern)
export const COORDINATE_REGEXP = new RegExp(coordinatePattern)
export const DOB_REGEXP = new RegExp(dobPattern)
export const IP_ADDRESS_REGEXP = new RegExp(ipV4Pattern)
export const CREDIT_CARD_REGEXP = new RegExp(creditCardPattern)
export const EMAIL_REGEXP = new RegExp(emailPattern)
export const SSN_REGEXP = new RegExp(ssnPattern)
export const PHONE_NUMBER_REGEXP = new RegExp(phoneNumberPattern)
export const VIN_REGEXP = new RegExp(vinPattern)

0 comments on commit eed141b

Please sign in to comment.