From 10c4a0a238929f494c816f7bd621465509f59380 Mon Sep 17 00:00:00 2001 From: Tim Thompson Date: Tue, 17 Aug 2021 19:33:51 +1000 Subject: [PATCH] Closes #22: Support for HL7 escape sequence decoding (#28) * WIP: starting on an escape sequence decoder * feat #22: WIP on escape sequence handling, not as easy as I'd hoped because of HL7's full on spec. This will prob come in stages. * feat #22: Good progress on simple delimiter escape sequences * feat #22: Ensure highlighted text sequences (\N\, \H\)are ignored * perf: better than halved the perf of the 'No escape sequence' benchmark using a regex(!) rather than a simple str.find() * feat: Added support for ignoring custom (\Zdd\) escape sequences * chore: docs pass, moved decoder into better module/location * perf: Moved to the regex for all searching ops, about a 15% improvement in the benchmark * docs * docs: Added demo example for info on how to use the library * docs: Docs pass * feat: Added support for \X..\ escape sequences * docs: Updated docs for \X\ sequences. --- CHANGELOG.md | 1 + Cargo.toml | 7 + benches/decoder.rs | 74 +++++++++ examples/demo.rs | 34 +++++ readme.md | 16 +- src/escape_sequence.rs | 331 +++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 13 ++ src/message.rs | 7 + 8 files changed, 478 insertions(+), 5 deletions(-) create mode 100644 benches/decoder.rs create mode 100644 examples/demo.rs create mode 100644 src/escape_sequence.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 30c7949..be048f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## 0.5.0 - Add `query` functions to replace the string based `Index` impls in the version version. These are functionally identical to the string `Index` implementations, but avoid some lifetime issues (returning `&&str`) and have visible documentation. + - Add `EscapeSequence` struct to support decoding [escape sequences](https://www.lyniate.com/knowledge-hub/hl7-escape-sequences/) back to their original values. ## 0.4.0 - Large change (thanks @sempervictus) to allow querying of message content by both numerical indexer and dot-notation string indexers diff --git a/Cargo.toml b/Cargo.toml index a5c96ba..92cc4e4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,9 @@ name="rusthl7" path="src/lib.rs" [dependencies] +hex = "0.4" +log = "0.4" +regex = "1.5" thiserror = "1.0" [dev-dependencies] @@ -19,4 +22,8 @@ criterion = "0.3" [[bench]] name = "simple_parse" +harness = false + +[[bench]] +name = "decoder" harness = false \ No newline at end of file diff --git a/benches/decoder.rs b/benches/decoder.rs new file mode 100644 index 0000000..1a1be60 --- /dev/null +++ b/benches/decoder.rs @@ -0,0 +1,74 @@ +use criterion::{criterion_group, criterion_main, Criterion}; +use rusthl7::{escape_sequence::*, separators::Separators}; + +// Note that we;re calkling decode on a whole message here, although it would normally be on an individual field... +// this is just to make it work a bit harder on a larger dataset, not because it makes sense in a HL7 sense + +fn no_sequences(c: &mut Criterion) { + c.bench_function("No Escape Sequences", |b| { + let delims = Separators::default(); + let decoder = EscapeSequence::new(delims); + + b.iter(|| { + let _ = decoder.decode(get_sample_message_no_sequence()); + }) + }); +} + +// We expect creation to be a little slower, as we init the regexes to make decode() calls faster +// Amortizing this cost across multiple calls makes sense if we're caching the struct +fn create_struct(c: &mut Criterion) { + c.bench_function("Create struct", |b| { + let delims = Separators::default(); + + b.iter(|| { + let _ = EscapeSequence::new(delims); + }) + }); +} + +fn no_sequences_but_backslash(c: &mut Criterion) { + c.bench_function("No Escape Sequences But Backslash", |b| { + let delims = Separators::default(); + let decoder = EscapeSequence::new(delims); + + b.iter(|| { + let _ = decoder.decode(get_sample_message_with_backslash()); + }) + }); +} + +fn has_escape_sequences(c: &mut Criterion) { + c.bench_function("Has Escape Sequences", |b| { + let delims = Separators::default(); + let decoder = EscapeSequence::new(delims); + + b.iter(|| { + let _ = decoder.decode(get_sample_message_with_escape_sequences()); + }) + }); +} + +fn get_sample_message_no_sequence() -> &'static str { + // note we've stripped the backslash from the MSH + "MSH|^~*&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rPID|||555-44-4444||EVERYWOMAN^EVE^E^^^^L|JONES|19620320|F|||153 FERNWOOD DR.^^STATESVILLE^OH^35292||(206)3345232|(206)752-121||||AC555444444||67-A4335^OH^20030520\rOBR|1|845439^GHH OE|1045813^GHH LAB|15545^GLUCOSE|||200202150730|||||||||555-55-5555^PRIMARY^PATRICIA P^^^^MD^^|||||||||F||||||444-44-4444^HIPPOCRATES^HOWARD H^^^^MD\rOBX|1|SN|1554-5^GLUCOSE^POST 12H CFST:MCNC:PT:SER/PLAS:QN||^182|mg/dl|70_105|H|||F" +} + +fn get_sample_message_with_backslash() -> &'static str { + //there's a backslash down at char 487! + "MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rPID|||555-44-4444||EVERYWOMAN^EVE^E^^^^L|JONES|19620320|F|||153 FERNWOOD DR.^^STATESVILLE^OH^35292||(206)3345232|(206)752-121||||AC555444444||67-A4335^OH^20030520\rOBR|1|845439^GHH OE|1045813^GHH LAB|15545^GLUCOSE|||200202150730|||||||||555-55-5555^PRIMARY^PATRICIA P^^^^MD^^|||||||||F||||||444-44-4444^HIPPOCRATES^HOWARD H^^^^MD\rOBX|1|SN|1554-5^GLUCOSE^POST 12H CFST:MCNC:PT:SER/PLAS:QN||^182|mg/dl|\\70_105|H|||F" +} + +fn get_sample_message_with_escape_sequences() -> &'static str { + //there's a backslash down at char 487! + "MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rPID|||\\F\\555-44-4444||EVERYWOMAN^EVE^E^^^^L|JONES|19620320|F|||153 FERNWOOD DR.^^STATESVILLE^OH^35292||(206)3345232|(206)752-121||||AC555444444||67-A4335^OH^20030520\rOBR|1|845439^GHH OE|1045813^GHH LAB|15545^GLUCOSE|||200202150730|||||||||555-55-5555^PRIMARY^PATRICIA P^^^^MD^^|||||||||F||||||444-44-4444^HIPPOCRATES^HOWARD H^^^^MD\rOBX|1|SN|1554-5^GLUCOSE^POST 12H CFST:MCNC:PT:SER/PLAS:QN||^182|mg/dl|\\70_105|H|||F" +} + +criterion_group!( + decoder, + create_struct, + no_sequences, + no_sequences_but_backslash, + has_escape_sequences +); +criterion_main!(decoder); diff --git a/examples/demo.rs b/examples/demo.rs new file mode 100644 index 0000000..a08d0be --- /dev/null +++ b/examples/demo.rs @@ -0,0 +1,34 @@ +/*! + A short example demonstrating one way to use this library for HL7 processing. +*/ + +use std::{convert::TryFrom, error::Error}; +use rusthl7::{escape_sequence::EscapeSequence, message::Message}; + +fn main() -> Result<(), Box> { + + // Normally message would come over the wire from a remote service etc. + // Consider using the hl7-mllp-code crate or similar to make building those network services easier. + let hl7_string = get_sample_message(); + + // Parse the string into a structured entity + let message = Message::try_from(hl7_string)?; + + // We can deep query message fields using the `query` functionality + let postcode = message.query("PID.F11.C5"); // Field 11, Component 5 + assert_eq!(postcode, "35292"); + + // If you have the potential for escape sequences in your data you can process those using `EscapeSequence` + let charge_to_practice = message.query("OBR.F23"); + assert_eq!(charge_to_practice, r#"Joes Obs \T\ Gynae"#); + + let decoder = EscapeSequence::new(message.get_separators()); + let charge_to_practice = decoder.decode(charge_to_practice); // Handle the escape sequences + assert_eq!(charge_to_practice, "Joes Obs & Gynae"); // converted the \T\ sequence to an ampersand + + Ok(()) +} + +fn get_sample_message() -> &'static str { + "MSH|^~\\&|GHH LAB|ELAB-3|GHH OE|BLDG4|200202150930||ORU^R01|CNTRL-3456|P|2.4\rPID|||555-44-4444||EVERYWOMAN^EVE^E^^^^L|JONES|19620320|F|||153 FERNWOOD DR.^^STATESVILLE^OH^35292||(206)3345232|(206)752-121||||AC555444444||67-A4335^OH^20030520\rOBR|1|845439^GHH OE|1045813^GHH LAB|15545^GLUCOSE|||200202150730|||||||||555-55-5555^PRIMARY^PATRICIA P^^^^MD^^|||||||Joes Obs \\T\\ Gynae||F||||||444-44-4444^HIPPOCRATES^HOWARD H^^^^MD\rOBX|1|SN|1554-5^GLUCOSE^POST 12H CFST:MCNC:PT:SER/PLAS:QN||^182|mg/dl|70_105|H|||F" +} \ No newline at end of file diff --git a/readme.md b/readme.md index 9b0e1ef..ecda2a1 100644 --- a/readme.md +++ b/readme.md @@ -5,21 +5,27 @@ Totally kind of like production ready! -The first cut was intended to parse from a multiline text blob into a tree of string slices, representing all the different facets of info. This second cut provides consistent structure down to the sub-sub-field, efficient accessors to shared string reference data, with standardized implementations of common functionality. -Interpreting these facets (type conversion, determining which fields they represent etc) is a future problem. +Interpreting these facets (type conversion, determining which fields they represent etc) is a future problem... there is **no plan whatsoever** for message conformance checks or anything of that nature. + +This library is trying to provide the _tooling_ you need to build robust HL7 based systems, without dictating _how_ you go about it. There's no one-size-fits-all here, so we try to provide a box of separate tools rather than a full framework. ### Intended Features and Design Notes: - [x] Initially use hl7 default separator chars - [x] Use separator chars from the message -- [X] Add support for sub-field (repeat/component/subcomponent) items +- [X] Add support for sub-field (component/subcomponent) items + - [ ] Field repeats (via `~`) are currently missing ([#26](https://github.com/wokket/rust-hl7/issues/26)) - [X] Initially, avoid any per-segment knowledge, requirement to read the spec too much etc. - Implementing all the segments, across all the hl7 versions, version-specific parsing etc is tooooo much while we're getting started. -- [ ] Add Decoding/Encoding of special chars +- [-] Add support for [HL7 escape sequences](https://www.lyniate.com/knowledge-hub/hl7-escape-sequences/) ([#22](https://github.com/wokket/rust-hl7/issues/22)) + - [x] Decoding of the most common escape sequences including `\E\`, `\R\`, `\S\` & `\T\` + - [x] Correctly passes through `\H\`, `\N\` and custom `\Z..\` sequences unchanged + - [X] Decodes `\X..\` sequences for hex-encoded chars + - [ ] Support for various unicode sequences (`\C..\`, `\M..\`). These are lower priority as [HL7 Australia considers them deprecated](https://confluence.hl7australia.com/display/OO/3+Datatypes#id-3Datatypes-3.1.1.6EscapesequencessupportingmultiplecharactersetsforFT,ST,andTXdatatypes) - [ ] Add tighter MSH as an exception to the above - [ ] The above allows us to parse everything as strings, and provide helper methods for type conversions as required. -- [ ] Parse using a from_str() impl rather than a dedicated parser (idiomatic but no lifetimes) +- [x] Parse a message using a `TryFrom<&str>` impl rather than a dedicated parser - [x] Index into messages using HL7 string index notation and binary methods - [x] Index into sub-fields using HL7 string index notation and binary methods - [X] Index into the segment enum using HL7 string index notation and binary methods diff --git a/src/escape_sequence.rs b/src/escape_sequence.rs new file mode 100644 index 0000000..29ef0a5 --- /dev/null +++ b/src/escape_sequence.rs @@ -0,0 +1,331 @@ +use log::{debug, trace}; +use regex::Regex; + +use crate::separators::Separators; +use std::borrow::Cow; + +/// This struct provides the decoding functionality to parse escape sequences from the source string back to their original chars. +/// +/// For more info see [here](https://www.lyniate.com/knowledge-hub/hl7-escape-sequences/) or [here](https://confluence.hl7australia.com/display/OOADRM20181/Appendix+1+Parsing+HL7v2#Appendix1ParsingHL7v2-Dealingwithreservedcharactersanddelimiters) +/// +/// ## Example: +/// ``` +/// # use rusthl7::escape_sequence::EscapeSequence; +/// # use rusthl7::separators::Separators; +/// let delims = Separators::default(); +/// let decoder = EscapeSequence::new(delims); +/// let hl7_field_value = r#"Obstetrician \T\ Gynaecologist"#; +/// let decoded = decoder.decode(hl7_field_value); +/// assert_eq!(decoded, r#"Obstetrician & Gynaecologist"#); +/// ``` +/// +/// ## Details +/// +/// This decoder will replace some, **but not all** of the standard HL7 escape sequences. +/// - `\E\`,`\F\`, '\R\`, `\S\`, `\T\` are all handled, and replaced with the Escape, Field, Repeat, Component and Sub-Component separator chars respectively +/// - `\X..\` hexidecimal erscape sequences are supported (2 hex digits per char) +/// +/// The following sequences are **NOT** replaced by design and will be left in the string: +/// - `\H\` Indicates the start of highlighted text, this is a consuming application problem and will not be replaced. +/// - `\N\` Indicates the end of highlighted text and resumption of normal text. This is a consuming application problem and will not be replaced. +/// - `\Z...\` Custom application escape sequences, these are custom (as are most `Z` items in HL7) and will not be replaced. +/// +/// Also, not all of the sequences that _should_ be replaced are currently being handled, specifically: +/// /// - `\Cxxyy\`, '\Mxxyyzz\ arguably _should_ be handled, but aren't currently. There's [some suggestion](https://confluence.hl7australia.com/display/OOADRM20181/Appendix+1+Parsing+HL7v2#Appendix1ParsingHL7v2-Unicodecharacters) that these are discouraged in lieu of html-escaped values +/// +/// If there's _no possibility_ of escape sequences (because there's no escape characters, typically backslashes) in the value, this function short circuits as early as possible and returns the original string slice for optimum performance. +pub struct EscapeSequence { + escape_buf: [u8; 1], + field_buf: [u8; 1], + repeat_buf: [u8; 1], + component_buf: [u8; 1], + subcomponent_buf: [u8; 1], + escape_regex: Regex, +} + +impl<'a> EscapeSequence { + /// Create a new struct ready for processing of escape sequences. + /// Escape sequences in HL7 are dependent on the actual delimiters used _for that message_, and so we need a [Separators] instance to know what chars we're working with. + /// + /// Creating a new [EscapeSequence] does involve some non-trivial work in order to improve the performance of the `decode()` operations. It's expected that instances of this struct will be cached + /// per message, or per sending application if it will always use the same separators, or for the lifetime of the process if you're only dealing with known (often default) separators. + pub fn new(delims: Separators) -> EscapeSequence { + let regex = if delims.escape_char == '\\' { + Regex::new(r#"\\"#) // needs special handling because backslashes have meaning in regexes, and need to be escaped + } else { + Regex::new(String::from(delims.escape_char).as_str()) //everything else just works (I hope!) + } + .unwrap(); + + let mut return_val = EscapeSequence { + escape_buf: [0; 1], // The spec specifically requires single byte (actually 7-bit ASCII) delim chars + field_buf: [0; 1], + repeat_buf: [0; 1], + component_buf: [0; 1], + subcomponent_buf: [0; 1], + escape_regex: regex, + }; + + // We need &str to inject into the output buffer, convert the `Char` here + let _bytes = delims.escape_char.encode_utf8(&mut return_val.escape_buf); + let _bytes = delims.field.encode_utf8(&mut return_val.field_buf); + let _bytes = delims.repeat.encode_utf8(&mut return_val.repeat_buf); + let _bytes = delims.component.encode_utf8(&mut return_val.component_buf); + let _bytes = delims + .subcomponent + .encode_utf8(&mut return_val.subcomponent_buf); + + return_val + } + + /// This is where the magic happens. Call this to update any escape sequences in the given &str. + pub fn decode(&self, input: S) -> Cow<'a, str> + where + S: Into>, + { + // The comments below will almost certainly reference backslashes as that is by far the most common escape character + // the reality is any reference to "backslash" is actually referencing the escape char in the MSH segemnt, and stored in `self.delims.escape_char` + + let input = input.into(); + let first = self.escape_regex.find(&input); //using `regex.find` here is about twice as fast for the 'no sequences' benchmark as using &str.find()... + + match first { + Some(first) => { + let first = first.start(); + + // We know there's a backslash, so we need to process stuff + + // we're going to be replacing (mainly) 3 char escape sequences (eg `\F\`) with a single char sequence (eg `|`) so the initial length of the input should be sufficient + let mut output: Vec = Vec::with_capacity(input.len()); + output.extend_from_slice(input[0..first].as_bytes()); // this doesn't include the escape char we found + + // index in input that we're up to + let mut i = first; + + debug!("Found first escape char at {}", first); + + while i < input.len() { + let start_of_sequence = self.escape_regex.find(&input[i..]); + if start_of_sequence.is_none() { + // there's nothing left to process, no more backslashes in the rest of the buffer + + trace!("No more sequence starts in input, completing..."); + output.extend_from_slice(input[i..].as_bytes()); // add the rest of the input + break; // break out of while loop + } + + let start_index = start_of_sequence.unwrap().start() + i; // index is offset into input by i chars as that's what's we subsliced above + trace!("Found the next escape char at {}", start_index); + + let end_of_sequence = self.escape_regex.find(&input[start_index + 1..]); + + if end_of_sequence.is_none() { + // there's nothing left to process, the backslash we are curently looking at is NOT an escape sequence + trace!("No more sequence ends in input, completing..."); + output.extend_from_slice(input[start_index..].as_bytes()); // add the rest of the input (including the escape char that brought us here) in one go + break; // break out of while loop + } + + // else we have found another escape char, get the slice in between + let end_index = end_of_sequence.unwrap().start() + start_index + 1; // the end is the number of chars after the start_index, not from the start of input + trace!("Found end of sequence at {}", end_index); + + let sequence = &input[start_index + 1..end_index]; + trace!("Found escape sequence: '{}'", sequence); + + // we have a possible window of data between i and start_index that we've just read through as text, but isn't yet in output... append it now + output.extend_from_slice(input[i..start_index].as_bytes()); + + match sequence { + "E" => output.extend_from_slice(&self.escape_buf), + "F" => output.extend_from_slice(&self.field_buf), + "R" => output.extend_from_slice(&self.repeat_buf), + "S" => output.extend_from_slice(&self.component_buf), + "T" => output.extend_from_slice(&self.subcomponent_buf), + + // Highlighted/Normal text sequences need to remain for consuming libraries to act on as they see fit + "H" | "N" => { + output.extend_from_slice(&self.escape_buf); + output.extend_from_slice(sequence.as_bytes()); + output.extend_from_slice(&self.escape_buf); + } + + _ => { + if sequence.starts_with('Z') { + trace!("Into custom escape sequence, ignoring..."); + output.extend_from_slice(&self.escape_buf); + output.extend_from_slice(sequence.as_bytes()); + output.extend_from_slice(&self.escape_buf); + + } else if sequence.starts_with('X') { + let hex_code = &sequence[1..]; + let hex = hex::decode(hex_code).expect("Unable to parse X-value into valid hex"); + println!("Converted hex code {} to {:?}", hex_code, hex); + output.extend_from_slice(&hex); + + // TODO: Add more sequences + } else { + // not a known sequence, must just be two backslashes randomly in a string + trace!("Unknown sequence, extending output..."); + output.extend_from_slice( + input[start_index - 1..end_index].as_bytes(), + ); + // include both the initial escape char, and also the final one. + } + } + } + + i = end_index + 1; // move through buffer, we we've covered everything up to this point now + } // while more chars in input to loop through + + Cow::Owned(String::from_utf8(output).unwrap()) + } + None => { + // no escape char in the string at all, just return what we have + input + } + } + } +} + +#[cfg(test)] +mod tests { + use std::str::FromStr; + + use super::*; + + #[test] + fn test_decode_does_nothing_if_not_required() { + let delims = Separators::default(); + let escaper = EscapeSequence::new(delims); + + let input = "There are no escape sequences here/there/."; + let output = escaper.decode(input); + assert_eq!(output, input); + } + + #[test] + fn test_decode_handles_simple_x_codes() { + let delims = Separators::default(); + let escaper = EscapeSequence::new(delims); + + let input = "Escape sequence with \\X0D\\."; + let output = escaper.decode(input); + assert_eq!(output, "Escape sequence with \r."); + } + + #[test] + fn test_decode_handles_multi_byte_x_codes() { + let delims = Separators::default(); + let escaper = EscapeSequence::new(delims); + + let input = "Sentence 1.\\X0D0A\\Sentence 2."; + let output = escaper.decode(input); + assert_eq!(output, "Sentence 1.\r\nSentence 2."); + } + + #[test] + fn test_decode_does_nothing_if_backslash_is_not_escape_sequence() { + let delims = Separators::default(); + let escaper = EscapeSequence::new(delims); + + let input = r#"There are no escape sequences here\there."#; + let output = escaper.decode(input); + assert_eq!(output, input); + } + + #[test] + fn test_decode_handles_field_sequence() { + let delims = Separators::default(); + let escaper = EscapeSequence::new(delims); + + let input = r#"Escape this \F\ please"#; + let output = escaper.decode(input); + assert_eq!(output, "Escape this | please"); + } + + #[test] + fn ensure_decode_does_not_eat_chars_it_shouldnt() { + let delims = Separators::default(); + let escaper = EscapeSequence::new(delims); + + let input = r#"Escape this \F please"#; + let output = escaper.decode(input); + assert_eq!(output, input); + } + + #[test] + fn ensure_decode_handles_custom_delims() { + let delims = Separators::from_str("MSH^!@#$").unwrap(); + let escaper = EscapeSequence::new(delims); + + let input = r#"Escape this #F# please"#; + let output = escaper.decode(input); + assert_eq!(output, "Escape this ^ please"); + } + + #[test] + fn ensure_decode_handles_eescape_sequence() { + let delims = Separators::default(); + let escaper = EscapeSequence::new(delims); + + let input = r#"Escape this \E\ please"#; // convert the escape sequence + let output = escaper.decode(input); + assert_eq!(output, r#"Escape this \ please"#); // into a single escape char + + // ensure it moves on past the char it just added + let input = r#"Escape this \E\ pretty \F\ please"#; // convert the escape sequence + let output = escaper.decode(input); + assert_eq!(output, r#"Escape this \ pretty | please"#); // into a single escape char and still handle future sequences ok + } + + #[test] + fn test_decode_handles_repeat_sequence() { + let delims = Separators::default(); + let escaper = EscapeSequence::new(delims); + + let input = r#"Escape this \R\ please"#; + let output = escaper.decode(input); + assert_eq!(output, "Escape this ~ please"); + } + + #[test] + fn test_decode_handles_component_sequence() { + let delims = Separators::default(); + let escaper = EscapeSequence::new(delims); + + let input = r#"Escape this \S\ please"#; + let output = escaper.decode(input); + assert_eq!(output, "Escape this ^ please"); + } + + #[test] + fn test_decode_handles_subcomponent_sequence() { + let delims = Separators::default(); + let escaper = EscapeSequence::new(delims); + + let input = r#"Obstetrician \T\ Gynaecologist"#; + let output = escaper.decode(input); + assert_eq!(output, "Obstetrician & Gynaecologist"); + } + + #[test] + fn ensure_decode_ignores_highlighting_sequence() { + let delims = Separators::default(); + let escaper = EscapeSequence::new(delims); + + let input = r#"Don't escape this \H\highlighted text\N\ please"#; + let output = escaper.decode(input); + assert_eq!(output, input); + } + + #[test] + fn ensure_decode_ignores_custom_sequence() { + let delims = Separators::default(); + let escaper = EscapeSequence::new(delims); + + let input = r#"Don't escape this custom sequence \Z1234\ please"#; + let output = escaper.decode(input); + assert_eq!(output, input); + } +} diff --git a/src/lib.rs b/src/lib.rs index 5ea367b..bfb7dbd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,16 @@ +/*! + +# RustHl7 - A HL7 V2 message parser and library + +This crate is attempting to provide the tooling for a fully spec-compliant HL7 V2 message parser. Note that _interpreting_ the parsed message elements into a strongly +typed segment/message format is specifically **out of scope** as there's simply too many variants over too many versions for me to go there (maybe +someone else could code-gen a crate using this this crate to provide the source information?). + +This crate tries to provide the tools to build HL7 systems without dictating _how_ to build your system, there's no such thing as one-size-fits all in healthcare! + +*/ + +pub mod escape_sequence; pub mod fields; pub mod message; pub mod segments; diff --git a/src/message.rs b/src/message.rs index 96feeb8..feac1c3 100644 --- a/src/message.rs +++ b/src/message.rs @@ -12,6 +12,7 @@ use std::ops::Index; pub struct Message<'a> { pub source: &'a str, pub segments: Vec>, + separators: Separators } impl<'a> Message<'a> { @@ -92,6 +93,11 @@ impl<'a> Message<'a> { self.source } + /// Gets the delimiter information for this Message + pub fn get_separators(&self) -> Separators { + self.separators + } + /// Access Segment, Field, or sub-field string references by string index pub fn query<'b, S>(&self, idx: S) -> &'a str where @@ -150,6 +156,7 @@ impl<'a> TryFrom<&'a str> for Message<'a> { let msg = Message { source, segments: segments?, + separators: delimiters }; Ok(msg)