Skip to content

Commit

Permalink
feat #22: WIP on escape sequence handling, not as easy as I'd hoped b…
Browse files Browse the repository at this point in the history
…ecause of HL7's full on spec. This will prob come in stages.
  • Loading branch information
wokket committed Aug 14, 2021
1 parent e258a00 commit 27efef2
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 26 deletions.
42 changes: 38 additions & 4 deletions benches/decoder.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
use criterion::{criterion_group, criterion_main, Criterion};
use rusthl7::{decoder::*, separators::Separators};

// Note that we;re calkling decode on a whole message here, although it would normally be on an individual field...
// this is just to make it work a bit harder on a larger dataset, not because it makes sense in a HL7 sense

fn no_sequences(c: &mut Criterion) {
c.bench_function("No Escape Sequences", |b| {

let delims = Separators::default();
let decoder = EscapeSequence::new(delims);

Expand All @@ -13,15 +15,47 @@ fn no_sequences(c: &mut Criterion) {
});
}

fn no_sequences_but_backslash(c: &mut Criterion) {
c.bench_function("No Escape Sequences But Backslash", |b| {
let delims = Separators::default();
let decoder = EscapeSequence::new(delims);

b.iter(|| {
let _ = decoder.decode(get_sample_message_with_backslash());
})
});
}

fn has_escape_sequences(c: &mut Criterion) {
c.bench_function("Has Escape Sequences", |b| {
let delims = Separators::default();
let decoder = EscapeSequence::new(delims);

b.iter(|| {
let _ = decoder.decode(get_sample_message_with_escape_sequences());
})
});
}

fn get_sample_message_no_sequence() -> &'static str {
"MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rPID|||555-44-4444||EVERYWOMAN^EVE^E^^^^L|JONES|19620320|F|||153 FERNWOOD DR.^^STATESVILLE^OH^35292||(206)3345232|(206)752-121||||AC555444444||67-A4335^OH^20030520\rOBR|1|845439^GHH OE|1045813^GHH LAB|15545^GLUCOSE|||200202150730|||||||||555-55-5555^PRIMARY^PATRICIA P^^^^MD^^|||||||||F||||||444-44-4444^HIPPOCRATES^HOWARD H^^^^MD\rOBX|1|SN|1554-5^GLUCOSE^POST 12H CFST:MCNC:PT:SER/PLAS:QN||^182|mg/dl|70_105|H|||F"
// note we've stripped the backslash from the MSH
"MSH|^~*&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rPID|||555-44-4444||EVERYWOMAN^EVE^E^^^^L|JONES|19620320|F|||153 FERNWOOD DR.^^STATESVILLE^OH^35292||(206)3345232|(206)752-121||||AC555444444||67-A4335^OH^20030520\rOBR|1|845439^GHH OE|1045813^GHH LAB|15545^GLUCOSE|||200202150730|||||||||555-55-5555^PRIMARY^PATRICIA P^^^^MD^^|||||||||F||||||444-44-4444^HIPPOCRATES^HOWARD H^^^^MD\rOBX|1|SN|1554-5^GLUCOSE^POST 12H CFST:MCNC:PT:SER/PLAS:QN||^182|mg/dl|70_105|H|||F"
}

fn get_sample_message_with_backslash() -> &'static str {
//there's a backslash down at char 487!
"MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rPID|||555-44-4444||EVERYWOMAN^EVE^E^^^^L|JONES|19620320|F|||153 FERNWOOD DR.^^STATESVILLE^OH^35292||(206)3345232|(206)752-121||||AC555444444||67-A4335^OH^20030520\rOBR|1|845439^GHH OE|1045813^GHH LAB|15545^GLUCOSE|||200202150730|||||||||555-55-5555^PRIMARY^PATRICIA P^^^^MD^^|||||||||F||||||444-44-4444^HIPPOCRATES^HOWARD H^^^^MD\rOBX|1|SN|1554-5^GLUCOSE^POST 12H CFST:MCNC:PT:SER/PLAS:QN||^182|mg/dl|\\70_105|H|||F"
}

fn get_sample_message_with_escape_sequences() -> &'static str {
//there's a backslash down at char 487!
"MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rPID|||\\F\\555-44-4444||EVERYWOMAN^EVE^E^^^^L|JONES|19620320|F|||153 FERNWOOD DR.^^STATESVILLE^OH^35292||(206)3345232|(206)752-121||||AC555444444||67-A4335^OH^20030520\rOBR|1|845439^GHH OE|1045813^GHH LAB|15545^GLUCOSE|||200202150730|||||||||555-55-5555^PRIMARY^PATRICIA P^^^^MD^^|||||||||F||||||444-44-4444^HIPPOCRATES^HOWARD H^^^^MD\rOBX|1|SN|1554-5^GLUCOSE^POST 12H CFST:MCNC:PT:SER/PLAS:QN||^182|mg/dl|\\70_105|H|||F"
}

criterion_group!(
decoder,
no_sequences
no_sequences,
no_sequences_but_backslash,
has_escape_sequences
);
criterion_main!(decoder);
criterion_main!(decoder);
110 changes: 88 additions & 22 deletions src/decoder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,71 +3,137 @@
For more info see [here](https://www.lyniate.com/knowledge-hub/hl7-escape-sequences/) or [here](https://confluence.hl7australia.com/display/OOADRM20181/Appendix+1+Parsing+HL7v2#Appendix1ParsingHL7v2-Dealingwithreservedcharactersanddelimiters)
## Details
This decoder will replace some, ** but not all ** of the standard HL7 escape sequences. Specifically, the following sequences are **NOT** replaced:
- `\H\` - Indicates the start of highlighted text, this is a consuming application problem and will not be replaced
- `\N\` - Indicates the end of highlighted text and resumption of normal text. This is a consuming application problem and will not be replaced
- `\Z...\` - Custom application escape sequences, these are custom (as are most `Z` items in HL7) and will not be replaced
*/

use std::borrow::Cow;
use regex::{Match, Regex};
use crate::separators::Separators;

use regex::Regex;
use std::borrow::Cow;

pub struct EscapeSequence {
delims: Separators,
regex: Regex
escape_char_regex: Regex,
field_regex: Regex,
}

impl<'a> EscapeSequence {

pub fn new(delims: Separators) -> EscapeSequence {
EscapeSequence {
let return_val = EscapeSequence {
delims,
regex: Regex::new("[/]").unwrap()
}
}
escape_char_regex: Regex::new(r#"\\"#).unwrap(),
field_regex: Regex::new(r#"\\F\\"#).unwrap(),
};

return_val
}

pub fn decode<S>(&self, input: S) -> Cow<'a, str>
where S: Into<Cow<'a, str>>
pub fn decode<S>(&self, input: S) -> Cow<'a, str>
where
S: Into<Cow<'a, str>>,
{

let input = input.into();
let first = self.regex.find(&input); // find the first escape sequence
//let first = input.find(self.delims.escape_char);
let first = self.escape_char_regex.find(&input);

if first.is_some() {
input.into()
} else { // no escape char in the string at all, just return what we have
input.into()
// We know there's a backslash, so we need to process stuff
// I wanted to use regex as a simple(ish) if slow(ish) way to get started with this, but the requirement for a dynamic escaping char (typically `\`)
// which may/may not need doubling up for regex interpretation reasons is making that harder...

let output = self
.field_regex
.replace_all(&input, self.delims.field.to_string());

Cow::Owned(output.into())

/*
// TODO: Awesome forwards-only string manip work... gotta start simple
let first = first.unwrap().start();
let mut output: Vec<u8> = Vec::from(input[0..first].as_bytes());
output.reserve(input.len() - first);
let rest = input[first+1..].bytes(); // the +1 skips the initial backslash that got us here
let mut iter = rest.into_iter();
while let Some(c) = iter.next() {
match c {
b'F' => {
output.extend_from_slice(&self.field_bytes);
iter.next(); // eat the next slash //TODO: How do we know that was a slash?
}
_ => output.push(c),
}
}
Cow::Owned(String::from_utf8(output).unwrap())
*/
} else {
// no escape char in the string at all, just return what we have
input
}

}
}


#[cfg(test)]
mod tests {
use std::str::FromStr;

use super::*;

#[test]
fn test_decode_does_nothing_if_not_required() {
let delims = Separators::default();
let escaper = EscapeSequence::new(delims);


let input = "There are no escape sequences here/there.";
let output = escaper.decode(input);
assert_eq!(output, input);
}

#[test]
fn test_decode_handles_field_sequence() {
fn test_decode_does_nothing_if_backslash_is_not_escape_sequence() {
let delims = Separators::default();
let escaper = EscapeSequence::new(delims);

let input = r#"There are no escape sequences here\there."#;
let output = escaper.decode(input);
assert_eq!(output, input);
}

let input = "Escape this \\F\\ please";
#[test]
fn test_decode_handles_field_sequence() {
let delims = Separators::default();
let escaper = EscapeSequence::new(delims);

let input = r#"Escape this \F\ please"#;
let output = escaper.decode(input);
assert_eq!(output, "Escape this | please");
}

#[test]
fn ensure_decode_does_not_eat_chars_it_shouldnt() {
let delims = Separators::default();
let escaper = EscapeSequence::new(delims);

let input = r#"Escape this \F please"#;
let output = escaper.decode(input);
assert_eq!(output, input);
}

#[test]
fn ensure_decode_handles_custom_delims() {
let delims = Separators::from_str("MSH|!@#$|").unwrap();
let escaper = EscapeSequence::new(delims);

}
let input = r#"Escape this #F# please"#;
let output = escaper.decode(input);
assert_eq!(output, "Escape this # please");
}
}

0 comments on commit 27efef2

Please sign in to comment.