From 295337d964c9aa7a0402ff5a0474b02a4b45b70b Mon Sep 17 00:00:00 2001
From: Hunter Damron <hdamron1594@yahoo.com>
Date: Fri, 15 May 2020 14:16:51 -0400
Subject: [PATCH 01/24] Added whitespace token and made -E keep spacing, TODO
 consume whitespace

---
 src/data/lex.rs |  4 ++++
 src/lex/cpp.rs  |  2 +-
 src/lex/mod.rs  | 20 +++++++++++++++++---
 src/main.rs     |  4 ++--
 4 files changed, 24 insertions(+), 6 deletions(-)
diff --git a/src/data/lex.rs b/src/data/lex.rs
index 813fc03d..b98498ab 100644
--- a/src/data/lex.rs
+++ b/src/data/lex.rs
@@ -190,6 +190,8 @@ pub enum Token {
     Literal(Literal),
     Id(InternedStr),
 
+    Whitespace(String),
+
     // Misc
     Ellipsis,
     StructDeref, // ->
@@ -353,6 +355,8 @@ impl std::fmt::Display for Token {
             Id(id) => write!(f, "{}", id),
             Keyword(k) => write!(f, "{}", k),
 
+            Whitespace(s) => write!(f, "{}", s),
+
             Ellipsis => write!(f, "..."),
             StructDeref => write!(f, "->"),
             Hash => write!(f, "#"),
diff --git a/src/lex/cpp.rs b/src/lex/cpp.rs
index 703a0a06..06cb25aa 100644
--- a/src/lex/cpp.rs
+++ b/src/lex/cpp.rs
@@ -320,7 +320,7 @@ impl<'a> PreProcessor<'a> {
         self.lexer().span(start)
     }
     #[inline]
-    fn consume_whitespace(&mut self) {
+    fn consume_whitespace(&mut self) -> String {
         self.lexer_mut().consume_whitespace()
     }
     #[inline]
diff --git a/src/lex/mod.rs b/src/lex/mod.rs
index 7e1b9aa8..a8ef9460 100644
--- a/src/lex/mod.rs
+++ b/src/lex/mod.rs
@@ -187,12 +187,15 @@ impl Lexer {
     ///
     /// Before: b"    // some comment\n /*multi comment*/hello   "
     /// After:  b"hello   "
-    fn consume_whitespace(&mut self) {
+    fn consume_whitespace(&mut self) -> String {
         // there may be comments following whitespace
+        let mut whitespace = String::new();
         loop {
             // whitespace
             while self.peek().map_or(false, |c| c.is_ascii_whitespace()) {
-                self.next_char();
+                if let Some(c) = self.next_char() {
+                    whitespace.push(c.into());
+                }
             }
             // comments
             if self.peek() == Some(b'/') {
@@ -211,6 +214,7 @@ impl Lexer {
                 break;
             }
         }
+        whitespace
     }
     /// Remove all characters between now and the next b'\n' character.
     ///
@@ -664,7 +668,17 @@ impl Iterator for Lexer {
             return None;
         }
 
-        self.consume_whitespace();
+        {
+            let span_start = self.location.offset;
+            let data = self.consume_whitespace();
+            if !data.is_empty() {
+                return Some(Ok(Locatable {
+                    data: Token::Whitespace(data),
+                    location: self.span(span_start),
+                }));
+            }
+        }
+
         let c = self.next_char().and_then(|c| {
             let span_start = self.location.offset - 1;
             // this giant switch is most of the logic
diff --git a/src/main.rs b/src/main.rs
index 00b79891..6ade8767 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -46,7 +46,7 @@ FLAGS:
                             Note that preprocessing discards whitespace and comments.
                             There is not currently a way to disable this behavior.
     -V, --version          Prints version information
-    
+
 OPTIONS:
         --color <when>       When to use color. May be \"never\", \"auto\", or \"always\". [default: auto]
     -o, --output <output>    The output file to use. [default: a.out]
@@ -134,7 +134,7 @@ fn real_main(buf: Rc<str>, bin_opt: BinOpt, output: &Path) -> Result<(), (Error,
         let stdout = io::stdout();
         let mut stdout_buf = BufWriter::new(stdout.lock());
         for token in rcc_try!(tokens, files) {
-            write!(stdout_buf, "{} ", token.data).expect("failed to write to stdout");
+            write!(stdout_buf, "{}", token.data).expect("failed to write to stdout");
         }
         writeln!(stdout_buf).expect("failed to write to stdout");
 

From 1514108236ec17507ecf7ebb646087f49e41483e Mon Sep 17 00:00:00 2001
From: Hunter Damron <hdamron1594@yahoo.com>
Date: Fri, 15 May 2020 15:57:45 -0400
Subject: [PATCH 02/24] Filtered whitespace before parser, TODO debug a lot of
 test cases

---
 src/lex/cpp.rs   | 14 +++++++++++++-
 src/lex/mod.rs   | 10 ++++++++++
 src/lib.rs       |  2 +-
 src/parse/mod.rs |  1 +
 4 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/src/lex/cpp.rs b/src/lex/cpp.rs
index 06cb25aa..f9b722e6 100644
--- a/src/lex/cpp.rs
+++ b/src/lex/cpp.rs
@@ -1285,6 +1285,16 @@ impl<'a> PreProcessor<'a> {
             }
         }
     }
+
+    /// Returns next token in stream which is not whitespace
+    pub fn next_non_whitespace(&mut self) -> Option<CppResult<Token>> {
+        loop {
+            match self.next() {
+                Some(Ok(Locatable {data: Token::Whitespace(_), location: _})) => continue,
+                other => break other,
+            }
+        }
+    }
 }
 
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
@@ -1407,7 +1417,7 @@ mod tests {
 
     macro_rules! assert_err {
         ($src: expr, $err: pat, $description: expr $(,)?) => {
-            match cpp($src).next().unwrap().unwrap_err().data {
+            match cpp($src).next_non_whitespace().unwrap().unwrap_err().data {
                 Error::PreProcessor($err) => {}
                 Error::PreProcessor(other) => panic!("expected {}, got {}", $description, other),
                 _ => panic!("expected cpp err"),
@@ -1427,9 +1437,11 @@ mod tests {
         assert_eq!(
             cpp(src)
                 .map(|res| res.map(|token| token.data))
+                .filter(|res| !matches!(res, Ok(Token::Whitespace(_))))
                 .collect::<Vec<_>>(),
             cpp(cpp_src)
                 .map(|res| res.map(|token| token.data))
+                .filter(|res| !matches!(res, Ok(Token::Whitespace(_))))
                 .collect::<Vec<_>>(),
             "{} is not the same as {}",
             src,
diff --git a/src/lex/mod.rs b/src/lex/mod.rs
index a8ef9460..e4b8a528 100644
--- a/src/lex/mod.rs
+++ b/src/lex/mod.rs
@@ -648,6 +648,16 @@ impl Lexer {
         }
         Ok(Token::Id(InternedStr::get_or_intern(id)))
     }
+
+    /// Returns next token in stream which is not whitespace
+    pub fn next_non_whitespace(&mut self) -> Option<LexResult<Locatable<Token>>> {
+        loop {
+            match self.next() {
+                Some(Ok(Locatable {data: Token::Whitespace(_), location: _})) => continue,
+                other => break other,
+            }
+        }
+    }
 }
 
 impl Iterator for Lexer {
diff --git a/src/lib.rs b/src/lib.rs
index 884a242b..dd4405da 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -240,7 +240,7 @@ pub fn check_semantics(buf: &str, opt: Opt) -> Program<Vec<Locatable<hir::Declar
         }};
     }
     let first = loop {
-        match cpp.next() {
+        match cpp.next_non_whitespace() {
             Some(Ok(token)) => break Some(token),
             Some(Err(err)) => handle_err!(err),
             None => break None,
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index b74fac2d..4dc4e7ca 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -141,6 +141,7 @@ impl<I: Lexer> Parser<I> {
     fn __impl_next_token(&mut self) -> Option<Locatable<Token>> {
         loop {
             match self.tokens.next() {
+                Some(Ok(Locatable {data: Token::Whitespace(_), location: _})) => continue,
                 Some(Ok(mut token)) => {
                     self.last_location = token.location;
                     // This is _such_ a hack

From 33f9f1242b7da6daee6440f46010ebf35bab9eaa Mon Sep 17 00:00:00 2001
From: Joshua Nelson <jyn514@gmail.com>
Date: Fri, 15 May 2020 19:11:03 -0400
Subject: [PATCH 03/24] Fix some bugs in whitespace handling

- Don't set `seen_line_token` for whitespace
- Ignore whitespace in `#if` expressions (since the parser doesn't know
what whitespace is)
- Run `cargo fmt`
---
 src/lex/cpp.rs   | 9 ++++++++-
 src/lex/mod.rs   | 8 ++++++--
 src/parse/mod.rs | 5 ++++-
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/src/lex/cpp.rs b/src/lex/cpp.rs
index f9b722e6..95d4f1fd 100644
--- a/src/lex/cpp.rs
+++ b/src/lex/cpp.rs
@@ -927,6 +927,10 @@ impl<'a> PreProcessor<'a> {
                         Ok(token)
                     }
                 },
+                Ok(Locatable {
+                    data: Token::Whitespace(_),
+                    ..
+                }) => continue,
                 _ => token,
             };
             cpp_tokens.push(token);
@@ -1290,7 +1294,10 @@ impl<'a> PreProcessor<'a> {
     pub fn next_non_whitespace(&mut self) -> Option<CppResult<Token>> {
         loop {
             match self.next() {
-                Some(Ok(Locatable {data: Token::Whitespace(_), location: _})) => continue,
+                Some(Ok(Locatable {
+                    data: Token::Whitespace(_),
+                    location: _,
+                })) => continue,
                 other => break other,
             }
         }
diff --git a/src/lex/mod.rs b/src/lex/mod.rs
index e4b8a528..5379ee8e 100644
--- a/src/lex/mod.rs
+++ b/src/lex/mod.rs
@@ -653,7 +653,10 @@ impl Lexer {
     pub fn next_non_whitespace(&mut self) -> Option<LexResult<Locatable<Token>>> {
         loop {
             match self.next() {
-                Some(Ok(Locatable {data: Token::Whitespace(_), location: _})) => continue,
+                Some(Ok(Locatable {
+                    data: Token::Whitespace(_),
+                    location: _,
+                })) => continue,
                 other => break other,
             }
         }
@@ -885,7 +888,8 @@ impl Iterator for Lexer {
                         .with(LexError::UnknownToken(x as char))));
                 }
             };
-            self.seen_line_token |= data != Token::Hash;
+            // We've seen a token if this isn't # and this isn't whitespace
+            self.seen_line_token |= data != Token::Hash && !matches!(data, Token::Whitespace(_));
             Some(Ok(Locatable {
                 data,
                 location: self.span(span_start),
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 4dc4e7ca..54b64592 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -141,7 +141,10 @@ impl<I: Lexer> Parser<I> {
     fn __impl_next_token(&mut self) -> Option<Locatable<Token>> {
         loop {
             match self.tokens.next() {
-                Some(Ok(Locatable {data: Token::Whitespace(_), location: _})) => continue,
+                Some(Ok(Locatable {
+                    data: Token::Whitespace(_),
+                    location: _,
+                })) => continue,
                 Some(Ok(mut token)) => {
                     self.last_location = token.location;
                     // This is _such_ a hack

From 30be8d9df3ef62d040a68c541090331ca522f847 Mon Sep 17 00:00:00 2001
From: Hunter Damron <hdamron1594@yahoo.com>
Date: Sat, 23 May 2020 00:46:22 -0400
Subject: [PATCH 04/24] Add a oneline whitespace consumtion after #ifdef,
 #ifndef, #undef and update tests to reflect non-whitespace

---
 src/lex/cpp.rs | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/src/lex/cpp.rs b/src/lex/cpp.rs
index 38d6e48e..e2139431 100644
--- a/src/lex/cpp.rs
+++ b/src/lex/cpp.rs
@@ -480,10 +480,12 @@ impl<'a> PreProcessor<'a> {
                 self.if_directive(condition, start)
             }
             IfNDef => {
+                self.consume_whitespace_oneline(start, CppError::EmptyExpression)?;
                 let name = self.expect_id()?;
                 self.if_directive(!self.definitions.contains_key(&name.data), start)
             }
             IfDef => {
+                self.consume_whitespace_oneline(start, CppError::EmptyExpression)?;
                 let name = self.expect_id()?;
                 self.if_directive(self.definitions.contains_key(&name.data), start)
             }
@@ -530,6 +532,7 @@ impl<'a> PreProcessor<'a> {
             }
             Define => self.define(start),
             Undef => {
+                self.consume_whitespace_oneline(start, CppError::EmptyExpression)?;
                 let name = self.expect_id()?;
                 self.definitions.remove(&name.data);
                 Ok(())
@@ -886,11 +889,7 @@ impl<'a> PreProcessor<'a> {
                 .collect::<Result<Vec<_>, Locatable<Error>>>()
         };
 
-        let line = self.line();
-        self.file_processor.consume_whitespace();
-        if self.line() != line {
-            return Err(self.span(start).error(CppError::EmptyDefine));
-        }
+        self.consume_whitespace_oneline(start, CppError::EmptyDefine)?;
         let id = self.expect_id()?;
         // NOTE: does _not_ discard whitespace
         if self.lexer_mut().match_next(b'(') {
@@ -1090,6 +1089,20 @@ impl<'a> PreProcessor<'a> {
             }
         }
     }
+
+    /// Consumes whitespace but returns error if it includes a newline
+    fn consume_whitespace_oneline(
+        &mut self,
+        start: u32,
+        error: CppError,
+    ) -> Result<String, CompileError> {
+        let line = self.line();
+        let ret = self.lexer_mut().consume_whitespace();
+        if self.line() != line {
+            return Err(self.span(start).error(error));
+        }
+        Ok(ret)
+    }
 }
 
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
@@ -1285,12 +1298,12 @@ mod tests {
         let code = "#ifdef a
         whatever, doesn't matter
         #endif";
-        assert_eq!(cpp(code).next(), None);
+        assert_eq!(cpp(code).next_non_whitespace(), None);
 
         let code = "#ifdef a\n#endif";
-        assert_eq!(cpp(code).next(), None);
+        assert_eq!(cpp(code).next_non_whitespace(), None);
 
-        assert!(cpp("#ifdef").next().unwrap().is_err());
+        assert!(cpp("#ifdef").next_non_whitespace().unwrap().is_err());
 
         let nested = "#ifdef a
         #ifdef b
@@ -1299,14 +1312,14 @@ mod tests {
         #endif
         char;";
         assert_eq!(
-            cpp(nested).next().unwrap().unwrap().data,
+            cpp(nested).next_non_whitespace().unwrap().unwrap().data,
             Token::Keyword(Keyword::Char)
         );
 
-        assert!(cpp("#endif").next().unwrap().is_err());
+        assert!(cpp("#endif").next_non_whitespace().unwrap().is_err());
 
         let same_line = "#ifdef a #endif\nint main() {}";
-        assert!(cpp(same_line).next().unwrap().is_err());
+        assert!(cpp(same_line).next_non_whitespace().unwrap().is_err());
     }
     #[test]
     fn ifndef() {
@@ -1315,7 +1328,7 @@ mod tests {
 #define A
 #endif
 A";
-        assert!(cpp(src).next().is_none());
+        assert!(cpp(src).next_non_whitespace().is_none());
     }
     #[test]
     fn object_macros() {

From 60db031d6e8d21f474f10fdc66817ed511e2d308 Mon Sep 17 00:00:00 2001
From: Hunter Damron <hdamron1594@yahoo.com>
Date: Sat, 23 May 2020 01:04:46 -0400
Subject: [PATCH 05/24] Consume whitespace between function macro args

---
 src/lex/cpp.rs   |  5 +++--
 src/lex/files.rs | 13 +++++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/lex/cpp.rs b/src/lex/cpp.rs
index e2139431..47d5fa83 100644
--- a/src/lex/cpp.rs
+++ b/src/lex/cpp.rs
@@ -821,7 +821,7 @@ impl<'a> PreProcessor<'a> {
     fn fn_args(&mut self, start: u32) -> Result<Vec<InternedStr>, Locatable<Error>> {
         let mut arguments = Vec::new();
         loop {
-            match self.file_processor.next() {
+            match self.file_processor.next_non_whitespace() {
                 None => {
                     return Err(CompileError::new(
                         CppError::EndOfFile("identifier or ')'").into(),
@@ -1091,13 +1091,14 @@ impl<'a> PreProcessor<'a> {
     }
 
     /// Consumes whitespace but returns error if it includes a newline
+    #[inline]
     fn consume_whitespace_oneline(
         &mut self,
         start: u32,
         error: CppError,
     ) -> Result<String, CompileError> {
         let line = self.line();
-        let ret = self.lexer_mut().consume_whitespace();
+        let ret = self.file_processor.consume_whitespace();
         if self.line() != line {
             return Err(self.span(start).error(error));
         }
diff --git a/src/lex/files.rs b/src/lex/files.rs
index 03a0133c..c79edd8f 100644
--- a/src/lex/files.rs
+++ b/src/lex/files.rs
@@ -159,4 +159,17 @@ impl FileProcessor {
         }
         tokens
     }
+
+    /// Returns next token in stream which is not whitespace
+    pub(super) fn next_non_whitespace(&mut self) -> Option<CompileResult<Locatable<Token>>> {
+        loop {
+            match self.next() {
+                Some(Ok(Locatable {
+                    data: Token::Whitespace(_),
+                    location: _,
+                })) => continue,
+                other => break other,
+            }
+        }
+    }
 }

From 30eac7f6f355cc2e2e01dbb62b60f7dc8a2307eb Mon Sep 17 00:00:00 2001
From: Hunter Damron <hdamron1594@yahoo.com>
Date: Sat, 23 May 2020 02:22:27 -0400
Subject: [PATCH 06/24] Fixed most of lex::tests::*

---
 src/lex/cpp.rs   | 29 ++++++++++++++++-------------
 src/lex/tests.rs | 12 +++++++++++-
 2 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/src/lex/cpp.rs b/src/lex/cpp.rs
index 47d5fa83..c475a0c2 100644
--- a/src/lex/cpp.rs
+++ b/src/lex/cpp.rs
@@ -1242,16 +1242,20 @@ mod tests {
             _ => panic!("not a keyword: {:?}", token),
         }
     }
+    fn is_same_preprocessed(xs: PreProcessor, ys: PreProcessor) -> bool {
+        let xs = xs
+            .map(|res| res.map(|token| token.data))
+            .filter(|res| !matches!(res, Ok(Token::Whitespace(_))))
+            .collect::<Vec<_>>();
+        let ys = ys
+            .map(|res| res.map(|token| token.data))
+            .filter(|res| !matches!(res, Ok(Token::Whitespace(_))))
+            .collect::<Vec<_>>();
+        xs == ys
+    }
     fn assert_same(src: &str, cpp_src: &str) {
-        assert_eq!(
-            cpp(src)
-                .map(|res| res.map(|token| token.data))
-                .filter(|res| !matches!(res, Ok(Token::Whitespace(_))))
-                .collect::<Vec<_>>(),
-            cpp(cpp_src)
-                .map(|res| res.map(|token| token.data))
-                .filter(|res| !matches!(res, Ok(Token::Whitespace(_))))
-                .collect::<Vec<_>>(),
+        assert!(
+            is_same_preprocessed(cpp(src), cpp(cpp_src)),
             "{} is not the same as {}",
             src,
             cpp_src,
@@ -1508,15 +1512,14 @@ c
     }
     #[test]
     fn test_comment_newline() {
-        let tokens: Vec<_> = cpp_no_newline(
+        let tokens = cpp_no_newline(
             "
 #if 1 //
 int main() {}
 #endif
 ",
-        )
-        .collect();
-        assert_eq!(tokens, cpp("int main() {}").collect::<Vec<_>>());
+        );
+        assert!(is_same_preprocessed(tokens, cpp("int main() {}")));
         assert_same(
             "
 #if 1 /**//**/
diff --git a/src/lex/tests.rs b/src/lex/tests.rs
index b6839480..1feb9199 100644
--- a/src/lex/tests.rs
+++ b/src/lex/tests.rs
@@ -15,7 +15,17 @@ fn lex(input: &str) -> Option<LexType> {
     lexed.pop()
 }
 fn lex_all(input: &str) -> Vec<LexType> {
-    cpp(input).collect()
+    cpp(input)
+        .filter(|res| {
+            !matches!(
+                res,
+                Ok(Locatable {
+                    data: Token::Whitespace(_),
+                    ..
+                })
+            )
+        })
+        .collect()
 }
 
 fn match_data<T>(lexed: Option<LexType>, closure: T) -> bool

From f0c146ef057085442919c5e5fd40ee8b5cb11df4 Mon Sep 17 00:00:00 2001
From: Hunter Damron <hdamron1594@yahoo.com>
Date: Sat, 23 May 2020 02:33:10 -0400
Subject: [PATCH 07/24] Handle lex::tests::test_no_newline

---
 src/lex/tests.rs | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/lex/tests.rs b/src/lex/tests.rs
index 1feb9199..14f984ec 100644
--- a/src/lex/tests.rs
+++ b/src/lex/tests.rs
@@ -16,18 +16,20 @@ fn lex(input: &str) -> Option<LexType> {
 }
 fn lex_all(input: &str) -> Vec<LexType> {
     cpp(input)
-        .filter(|res| {
-            !matches!(
-                res,
-                Ok(Locatable {
-                    data: Token::Whitespace(_),
-                    ..
-                })
-            )
-        })
+        .filter(is_not_whitespace)
         .collect()
 }
 
+pub(crate) fn is_not_whitespace(res: &LexType) -> bool {
+    !matches!(
+        res,
+        Ok(Locatable {
+            data: Token::Whitespace(_),
+            ..
+        })
+    )
+}
+
 fn match_data<T>(lexed: Option<LexType>, closure: T) -> bool
 where
     T: FnOnce(Result<&Token, &str>) -> bool,
@@ -305,7 +307,7 @@ fn test_strings() {
 #[test]
 fn test_no_newline() {
     assert!(cpp_no_newline("").next().is_none());
-    let mut tokens: Vec<_> = cpp_no_newline(" ").collect();
+    let mut tokens: Vec<_> = cpp_no_newline(" ").filter(is_not_whitespace).collect();
     assert_eq!(tokens.len(), 1);
     assert!(tokens.remove(0).unwrap_err().is_lex_err());
 

From 8e0509e45ef518c0c3b2583b92de0cc99aa0e096 Mon Sep 17 00:00:00 2001
From: Hunter Damron <hdamron1594@yahoo.com>
Date: Sat, 23 May 2020 02:50:07 -0400
Subject: [PATCH 08/24] Changed analyze::test::lol to not have leading newline

TODO should parse_all remove leading newlines?
---
 src/analyze/mod.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/analyze/mod.rs b/src/analyze/mod.rs
index ec7d7f64..b0d8f159 100644
--- a/src/analyze/mod.rs
+++ b/src/analyze/mod.rs
@@ -1925,8 +1925,7 @@ pub(crate) mod test {
     }
     #[test]
     fn lol() {
-        let lol = "
-int *jynelson(int(*fp)(int)) {
+        let lol = "int *jynelson(int(*fp)(int)) {
     return 0;
 }
 int f(int i) {

From 01f040b6b0054bd0d8a6094efb770a079fa3b794 Mon Sep 17 00:00:00 2001
From: Hunter Damron <hdamron1594@yahoo.com>
Date: Sat, 23 May 2020 02:56:04 -0400
Subject: [PATCH 09/24] Added whitespace between hash and directive

---
 src/lex/cpp.rs   | 2 +-
 src/lex/tests.rs | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/lex/cpp.rs b/src/lex/cpp.rs
index c475a0c2..6b88ee12 100644
--- a/src/lex/cpp.rs
+++ b/src/lex/cpp.rs
@@ -427,7 +427,7 @@ impl<'a> PreProcessor<'a> {
         };
         Some(if is_hash && !self.file_processor.seen_line_token() {
             let line = self.file_processor.line();
-            match self.file_processor.next()? {
+            match self.file_processor.next_non_whitespace()? {
                 Ok(Locatable {
                     data: Token::Id(id),
                     location,
diff --git a/src/lex/tests.rs b/src/lex/tests.rs
index 14f984ec..5e067032 100644
--- a/src/lex/tests.rs
+++ b/src/lex/tests.rs
@@ -15,9 +15,7 @@ fn lex(input: &str) -> Option<LexType> {
     lexed.pop()
 }
 fn lex_all(input: &str) -> Vec<LexType> {
-    cpp(input)
-        .filter(is_not_whitespace)
-        .collect()
+    cpp(input).filter(is_not_whitespace).collect()
 }
 
 pub(crate) fn is_not_whitespace(res: &LexType) -> bool {

From d51af7e4b50b918d98bdc3f637f23b5249ac7f32 Mon Sep 17 00:00:00 2001
From: Hunter Damron <hdamron1594@yahoo.com>
Date: Sat, 23 May 2020 03:19:54 -0400
Subject: [PATCH 10/24] Remove trailing newline for -E

---
 src/main.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/main.rs b/src/main.rs
index 6ade8767..8bb6815a 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -136,7 +136,6 @@ fn real_main(buf: Rc<str>, bin_opt: BinOpt, output: &Path) -> Result<(), (Error,
         for token in rcc_try!(tokens, files) {
             write!(stdout_buf, "{}", token.data).expect("failed to write to stdout");
         }
-        writeln!(stdout_buf).expect("failed to write to stdout");
 
         return Ok(());
     } else {

From f671f65e898b2a537f56d2396dd347efe21a48c0 Mon Sep 17 00:00:00 2001
From: Hunter Damron <hdamron1594@yahoo.com>
Date: Sat, 23 May 2020 20:01:40 -0400
Subject: [PATCH 11/24] Handle spaces in defines and whitespace in comments

---
 src/lex/cpp.rs |  9 +++++++++
 src/lex/mod.rs | 23 ++++++++++++++++++-----
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/src/lex/cpp.rs b/src/lex/cpp.rs
index 6b88ee12..0410da29 100644
--- a/src/lex/cpp.rs
+++ b/src/lex/cpp.rs
@@ -886,6 +886,15 @@ impl<'a> PreProcessor<'a> {
             this.tokens_until_newline()
                 .into_iter()
                 .map(|res| res.map(|loc| loc.data))
+                .enumerate()
+                .map(|(i, x)| {
+                    if i == 0 {
+                        vec![x]
+                    } else {
+                        vec![x, Ok(Token::Whitespace(String::from(" ")))]
+                    }
+                })
+                .flatten()
                 .collect::<Result<Vec<_>, Locatable<Error>>>()
         };
 
diff --git a/src/lex/mod.rs b/src/lex/mod.rs
index 3b73f0af..bf1554e7 100644
--- a/src/lex/mod.rs
+++ b/src/lex/mod.rs
@@ -204,12 +204,16 @@ impl Lexer {
             // comments
             if self.peek() == Some(b'/') {
                 match self.peek_next() {
-                    Some(b'/') => self.consume_line_comment(),
+                    Some(b'/') => {
+                        self.consume_line_comment();
+                        whitespace.push('\n');
+                    }
                     Some(b'*') => {
                         self.next_char();
                         self.next_char();
-                        if let Err(err) = self.consume_multi_comment() {
-                            self.error_handler.push_back(err);
+                        match self.consume_multi_comment() {
+                            Ok(ws) => whitespace.push_str(&ws),
+                            Err(err) => self.error_handler.push_back(err),
                         }
                     }
                     _ => break,
@@ -236,12 +240,21 @@ impl Lexer {
     ///
     /// Before: u8s{"hello this is a lot of text */ int main(){}"}
     /// After:  chars{" int main(){}"}
-    fn consume_multi_comment(&mut self) -> LexResult<()> {
+    ///
+    /// Return newlines occupied by the comment or a space if no newlines
+    fn consume_multi_comment(&mut self) -> LexResult<String> {
+        let mut whitespace = String::new();
         let start = self.location.offset - 2;
         while let Some(c) = self.next_char() {
             if c == b'*' && self.peek() == Some(b'/') {
                 self.next_char();
-                return Ok(());
+                if whitespace.is_empty() {
+                    whitespace.push(' '); // For the case `a/* */b`
+                }
+                return Ok(whitespace);
+            }
+            if c == b'\n' {
+                whitespace.push(c.into());
             }
         }
         Err(Locatable {

From 36b7fc3d9a59fda6e029478198cda95d4bc4c40e Mon Sep 17 00:00:00 2001
From: Hunter Damron <hdamron1594@yahoo.com>
Date: Sat, 23 May 2020 21:10:58 -0400
Subject: [PATCH 12/24] Handle lex::tests::test_no_newline (again)

---
 src/lex/tests.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lex/tests.rs b/src/lex/tests.rs
index 5e067032..2480b7a1 100644
--- a/src/lex/tests.rs
+++ b/src/lex/tests.rs
@@ -310,7 +310,7 @@ fn test_no_newline() {
     assert!(tokens.remove(0).unwrap_err().is_lex_err());
 
     // regression test for https://github.com/jyn514/rcc/issues/323
-    let tokens: Vec<_> = cpp_no_newline("//").collect();
+    let tokens: Vec<_> = cpp_no_newline("//").filter(is_not_whitespace).collect();
     assert_eq!(tokens.len(), 1);
     assert!(tokens[0].as_ref().unwrap_err().is_lex_err());
 }

From 675989b8e52c043b707640d40f62d90843c16885 Mon Sep 17 00:00:00 2001
From: Hunter Damron <hdamron1594@yahoo.com>
Date: Sun, 24 May 2020 00:03:10 -0400
Subject: [PATCH 13/24] Fix error messages for macros and #ifdef

---
 src/data/error.rs | 6 ++++++
 src/lex/cpp.rs    | 9 ++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/data/error.rs b/src/data/error.rs
index 9bf7d64e..a5a33867 100644
--- a/src/data/error.rs
+++ b/src/data/error.rs
@@ -455,6 +455,12 @@ pub enum CppError {
     #[error("expected expression for #if")]
     EmptyExpression,
 
+    #[error("macro name missing")]
+    ExpectedMacroId,
+
+    #[error("missing {0} in {1}")]
+    Expected(&'static str, &'static str),
+
     /// A `#define` occured without an identifier following.
     #[error("macro name missing")]
     EmptyDefine,
diff --git a/src/lex/cpp.rs b/src/lex/cpp.rs
index 0410da29..5b6aeb84 100644
--- a/src/lex/cpp.rs
+++ b/src/lex/cpp.rs
@@ -480,12 +480,12 @@ impl<'a> PreProcessor<'a> {
                 self.if_directive(condition, start)
             }
             IfNDef => {
-                self.consume_whitespace_oneline(start, CppError::EmptyExpression)?;
+                self.consume_whitespace_oneline(start, CppError::ExpectedMacroId)?;
                 let name = self.expect_id()?;
                 self.if_directive(!self.definitions.contains_key(&name.data), start)
             }
             IfDef => {
-                self.consume_whitespace_oneline(start, CppError::EmptyExpression)?;
+                self.consume_whitespace_oneline(start, CppError::ExpectedMacroId)?;
                 let name = self.expect_id()?;
                 self.if_directive(self.definitions.contains_key(&name.data), start)
             }
@@ -907,7 +907,10 @@ impl<'a> PreProcessor<'a> {
             // # define identifier lparen identifier-listopt ) replacement-list new-line
             // # define identifier lparen ... ) replacement-list new-line
             // # define identifier lparen identifier-list , ... ) replacement-list new-line
-            self.lexer_mut().consume_whitespace();
+            self.consume_whitespace_oneline(
+                self.file_processor.offset(),
+                CppError::Expected(")", "macro parameter list"),
+            )?;
             let params = if !self.lexer_mut().match_next(b')') {
                 self.fn_args(start)?
             } else {

From 1d5578fc6ab61cc2c99ed6bb9391af170a5779ca Mon Sep 17 00:00:00 2001
From: Hunter Damron <hdamron1594@yahoo.com>
Date: Sun, 24 May 2020 00:07:28 -0400
Subject: [PATCH 14/24] Rework whitespace in tokens_until_newline

---
 src/lex/cpp.rs   | 46 ++++++++++++++++++++++------------------------
 src/lex/files.rs | 17 +++++++++++++++--
 2 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/src/lex/cpp.rs b/src/lex/cpp.rs
index 5b6aeb84..77bb8442 100644
--- a/src/lex/cpp.rs
+++ b/src/lex/cpp.rs
@@ -410,8 +410,15 @@ impl<'a> PreProcessor<'a> {
         self.file_processor.line()
     }
 
-    fn tokens_until_newline(&mut self) -> Vec<CompileResult<Locatable<Token>>> {
-        self.file_processor.tokens_until_newline()
+    fn tokens_until_newline(&mut self, whitespace: bool) -> Vec<CompileResult<Locatable<Token>>> {
+        self.file_processor.tokens_until_newline(whitespace)
+    }
+
+    fn is_not_whitespace(res: &CppResult<Token>) -> bool {
+        !matches!(res, Ok(Locatable {
+            data: Token::Whitespace(_),
+            ..
+        }))
     }
 
     /// If at the start of the line and we see `#directive`, return that directive.
@@ -540,14 +547,14 @@ impl<'a> PreProcessor<'a> {
             Pragma => {
                 self.error_handler
                     .warn(WarningDiagnostic::IgnoredPragma, self.span(start));
-                drop(self.tokens_until_newline());
+                drop(self.tokens_until_newline(false));
                 Ok(())
             }
             // NOTE: #warning is a non-standard extension, but is implemented
             // by most major compilers including clang and gcc.
             Warning => {
                 let tokens: Vec<_> = self
-                    .tokens_until_newline()
+                    .tokens_until_newline(false)
                     .into_iter()
                     .map(|res| res.map(|l| l.data))
                     .collect::<Result<_, _>>()?;
@@ -557,7 +564,7 @@ impl<'a> PreProcessor<'a> {
             }
             Error => {
                 let tokens: Vec<_> = self
-                    .tokens_until_newline()
+                    .tokens_until_newline(false)
                     .into_iter()
                     .map(|res| res.map(|l| l.data))
                     .collect::<Result<_, _>>()?;
@@ -570,7 +577,7 @@ impl<'a> PreProcessor<'a> {
                     WarningDiagnostic::Generic("#line is not yet implemented".into()),
                     self.span(start),
                 );
-                drop(self.tokens_until_newline());
+                drop(self.tokens_until_newline(false));
                 Ok(())
             }
             Include => self.include(start),
@@ -580,7 +587,7 @@ impl<'a> PreProcessor<'a> {
     fn boolean_expr(&mut self) -> Result<bool, CompileError> {
         let start = self.file_processor.offset();
         let lex_tokens: Vec<_> = self
-            .tokens_until_newline()
+            .tokens_until_newline(false)
             .into_iter()
             .collect::<Result<_, CompileError>>()?;
         let location = self.span(start);
@@ -703,6 +710,7 @@ impl<'a> PreProcessor<'a> {
             .into_iter()
             .map(|t| replace(definitions, t.data, std::iter::empty(), t.location))
             .flatten()
+            .filter(PreProcessor::is_not_whitespace)
             .map(|mut token| {
                 if let Ok(tok) = &mut token {
                     expr_location = Some(location.maybe_merge(expr_location));
@@ -883,18 +891,9 @@ impl<'a> PreProcessor<'a> {
     // `#define f (a) - object macro
     fn define(&mut self, start: u32) -> Result<(), Locatable<Error>> {
         let body = |this: &mut PreProcessor| {
-            this.tokens_until_newline()
+            this.tokens_until_newline(true)
                 .into_iter()
                 .map(|res| res.map(|loc| loc.data))
-                .enumerate()
-                .map(|(i, x)| {
-                    if i == 0 {
-                        vec![x]
-                    } else {
-                        vec![x, Ok(Token::Whitespace(String::from(" ")))]
-                    }
-                })
-                .flatten()
                 .collect::<Result<Vec<_>, Locatable<Error>>>()
         };
 
@@ -1254,16 +1253,15 @@ mod tests {
             _ => panic!("not a keyword: {:?}", token),
         }
     }
+    fn _is_not_whitespace(res: &CompileResult<Token>) -> bool {
+        !matches!(res, Ok(Token::Whitespace(_)))
+    }
     fn is_same_preprocessed(xs: PreProcessor, ys: PreProcessor) -> bool {
-        let xs = xs
-            .map(|res| res.map(|token| token.data))
-            .filter(|res| !matches!(res, Ok(Token::Whitespace(_))))
-            .collect::<Vec<_>>();
-        let ys = ys
+        let to_vec = |xs: PreProcessor| xs
             .map(|res| res.map(|token| token.data))
-            .filter(|res| !matches!(res, Ok(Token::Whitespace(_))))
+            .filter(_is_not_whitespace)
             .collect::<Vec<_>>();
-        xs == ys
+        to_vec(xs) == to_vec(ys)
     }
     fn assert_same(src: &str, cpp_src: &str) {
         assert!(
diff --git a/src/lex/files.rs b/src/lex/files.rs
index c79edd8f..041917a9 100644
--- a/src/lex/files.rs
+++ b/src/lex/files.rs
@@ -138,13 +138,20 @@ impl FileProcessor {
 
     /// Return all tokens from the current position until the end of the current line.
     ///
+    /// * `whitespace` - whether or not to include whitespace tokens
+    ///
     /// Note that these are _tokens_ and not bytes, so if there are invalid tokens
     /// on the current line, this will return a lex error.
-    pub(super) fn tokens_until_newline(&mut self) -> Vec<CompileResult<Locatable<Token>>> {
+    pub(super) fn tokens_until_newline(
+        &mut self,
+        whitespace: bool,
+    ) -> Vec<CompileResult<Locatable<Token>>> {
         let mut tokens = Vec::new();
         let line = self.line();
         loop {
-            self.consume_whitespace();
+            let ws_start = self.offset();
+            let ws = self.consume_whitespace();
+            let ws_span = self.span(ws_start);
             if self.line() != line {
                 // lines should end with a newline, but in case they don't, don't crash
                 assert!(!self.lexer().seen_line_token || self.lexer_mut().peek().is_none(),
@@ -152,6 +159,12 @@ impl FileProcessor {
                     self.lexer_mut().peek());
                 break;
             }
+            if whitespace && !ws.is_empty() {
+                tokens.push(Ok(Locatable {
+                    data: Token::Whitespace(ws), // NOTE: in clang, this is one space
+                    location: ws_span,
+                }));
+            }
             match self.next() {
                 Some(token) => tokens.push(token),
                 None => break,

From c1aa84a90bdca994aa6ce2f8b899af4e08f54dae Mon Sep 17 00:00:00 2001
From: Hunter Damron <hdamron1594@yahoo.com>
Date: Sun, 24 May 2020 00:09:38 -0400
Subject: [PATCH 15/24] De Morgan

---
 src/lex/mod.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/lex/mod.rs b/src/lex/mod.rs
index bf1554e7..4895ad5e 100644
--- a/src/lex/mod.rs
+++ b/src/lex/mod.rs
@@ -905,8 +905,8 @@ impl Iterator for Lexer {
                         .with(LexError::UnknownToken(x as char))));
                 }
             };
-            // We've seen a token if this isn't # and this isn't whitespace
-            self.seen_line_token |= data != Token::Hash && !matches!(data, Token::Whitespace(_));
+            // We've seen a token if this isn't # or whitespace
+            self.seen_line_token |= !(data == Token::Hash || matches!(data, Token::Whitespace(_)));
             Some(Ok(Locatable {
                 data,
                 location: self.span(span_start),

From 81d47d9f9ab4d0e7103c77d993a4b015fa2dc4a5 Mon Sep 17 00:00:00 2001
From: Hunter Damron <hdamron1594@yahoo.com>
Date: Sun, 24 May 2020 00:30:16 -0400
Subject: [PATCH 16/24] cargo fmt

---
 src/lex/cpp.rs | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/lex/cpp.rs b/src/lex/cpp.rs
index 77bb8442..bfd67abb 100644
--- a/src/lex/cpp.rs
+++ b/src/lex/cpp.rs
@@ -415,10 +415,13 @@ impl<'a> PreProcessor<'a> {
     }
 
     fn is_not_whitespace(res: &CppResult<Token>) -> bool {
-        !matches!(res, Ok(Locatable {
-            data: Token::Whitespace(_),
-            ..
-        }))
+        !matches!(
+            res,
+            Ok(Locatable {
+                data: Token::Whitespace(_),
+                ..
+            })
+        )
     }
 
     /// If at the start of the line and we see `#directive`, return that directive.
@@ -1257,10 +1260,11 @@ mod tests {
         !matches!(res, Ok(Token::Whitespace(_)))
     }
     fn is_same_preprocessed(xs: PreProcessor, ys: PreProcessor) -> bool {
-        let to_vec = |xs: PreProcessor| xs
-            .map(|res| res.map(|token| token.data))
-            .filter(_is_not_whitespace)
-            .collect::<Vec<_>>();
+        let to_vec = |xs: PreProcessor| {
+            xs.map(|res| res.map(|token| token.data))
+                .filter(_is_not_whitespace)
+                .collect::<Vec<_>>()
+        };
         to_vec(xs) == to_vec(ys)
     }
     fn assert_same(src: &str, cpp_src: &str) {

From bbbb3ab68ec269c09a00b02becb4e36429bbc2b0 Mon Sep 17 00:00:00 2001
From: Hunter Damron <hdamron1594@yahoo.com>
Date: Sun, 24 May 2020 00:43:56 -0400
Subject: [PATCH 17/24] Add a few tests for preprocess only with exact matching

TODO add tests involving preprocessor stuff
---
 src/lex/cpp.rs | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/lex/cpp.rs b/src/lex/cpp.rs
index bfd67abb..d83a33b0 100644
--- a/src/lex/cpp.rs
+++ b/src/lex/cpp.rs
@@ -1275,6 +1275,15 @@ mod tests {
             cpp_src,
         );
     }
+    fn assert_same_exact(src: &str, cpp_src: &str) {
+        // NOTE make sure `cpp_src` has a trailing newline
+        let pprint = cpp(src)
+            .filter_map(|res| res.ok().map(|token| token.data))
+            .map(|res| format!("{}", res))
+            .collect::<Vec<_>>()
+            .join("");
+        assert_eq!(pprint, format!("{}\n", cpp_src)); // Because `cpp` adds newline, do it here too
+    }
     #[test]
     fn keywords() {
         for keyword in KEYWORDS.values() {
@@ -1580,4 +1589,12 @@ int main(){}
         ";
         assert_same(original, "a(1)");
     }
+    #[test]
+    // https://github.com/jyn514/rcc/issues/356
+    fn preprocess_only() {
+        assert_same_exact("int \t\n\r     main() {}", "int \t\n\r     main() {}");
+        assert_same_exact("int/* */main() {}", "int main() {}");
+        assert_same_exact("int/*\n\n\n*/main() {}", "int\n\n\nmain() {}");
+        // TODO add tests for `assert_same_exact` with preprocessor macros
+    }
 }

From 46b4943f0412cc23c392088109603354ec526b69 Mon Sep 17 00:00:00 2001
From: Hunter Damron <hdamron1594@yahoo.com>
Date: Sun, 24 May 2020 21:04:51 -0400
Subject: [PATCH 18/24] Make Whitespace matches consistently use ..

---
 src/lex/cpp.rs   | 2 +-
 src/lex/files.rs | 2 +-
 src/lex/mod.rs   | 2 +-
 src/parse/mod.rs | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/lex/cpp.rs b/src/lex/cpp.rs
index d83a33b0..89d83a93 100644
--- a/src/lex/cpp.rs
+++ b/src/lex/cpp.rs
@@ -1097,7 +1097,7 @@ impl<'a> PreProcessor<'a> {
             match self.next() {
                 Some(Ok(Locatable {
                     data: Token::Whitespace(_),
-                    location: _,
+                    ..
                 })) => continue,
                 other => break other,
             }
diff --git a/src/lex/files.rs b/src/lex/files.rs
index 041917a9..8f2f6dd5 100644
--- a/src/lex/files.rs
+++ b/src/lex/files.rs
@@ -179,7 +179,7 @@ impl FileProcessor {
             match self.next() {
                 Some(Ok(Locatable {
                     data: Token::Whitespace(_),
-                    location: _,
+                    ..
                 })) => continue,
                 other => break other,
             }
diff --git a/src/lex/mod.rs b/src/lex/mod.rs
index 4895ad5e..3927cbe6 100644
--- a/src/lex/mod.rs
+++ b/src/lex/mod.rs
@@ -672,7 +672,7 @@ impl Lexer {
             match self.next() {
                 Some(Ok(Locatable {
                     data: Token::Whitespace(_),
-                    location: _,
+                    ..
                 })) => continue,
                 other => break other,
             }
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 0cd8d53d..7aed6671 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -150,7 +150,7 @@ impl<I: Lexer> Parser<I> {
             match self.tokens.next() {
                 Some(Ok(Locatable {
                     data: Token::Whitespace(_),
-                    location: _,
+                    ..
                 })) => continue,
                 Some(Ok(mut token)) => {
                     self.last_location = token.location;

From 78ad8fc35cf58d44d6a63fb2e919465dbd1d4f58 Mon Sep 17 00:00:00 2001
From: Hunter Damron <hdamron1594@yahoo.com>
Date: Sun, 24 May 2020 21:10:39 -0400
Subject: [PATCH 19/24] Fixed issue with whitespace at beginning of
 analyze::test::lol in `parse_all`

---
 src/analyze/mod.rs | 3 ++-
 src/parse/mod.rs   | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/analyze/mod.rs b/src/analyze/mod.rs
index b0d8f159..ec7d7f64 100644
--- a/src/analyze/mod.rs
+++ b/src/analyze/mod.rs
@@ -1925,7 +1925,8 @@ pub(crate) mod test {
     }
     #[test]
     fn lol() {
-        let lol = "int *jynelson(int(*fp)(int)) {
+        let lol = "
+int *jynelson(int(*fp)(int)) {
     return 0;
 }
 int f(int i) {
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 7aed6671..615843bf 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -363,7 +363,7 @@ pub(crate) mod test {
     pub(crate) fn parser(input: &str) -> Parser<PreProcessor> {
         //let mut lexer = Lexer::new((), format!("{}\n", input), false);
         let mut lexer = cpp(input);
-        let first: Locatable<Token> = lexer.next().unwrap().unwrap();
+        let first: Locatable<Token> = lexer.next_non_whitespace().unwrap().unwrap();
         Parser::new(first, lexer, false)
     }
 

From 8c4f56873adedad776e27782bc7b27c8793cf8ce Mon Sep 17 00:00:00 2001
From: Hunter Damron <hdamron1594@yahoo.com>
Date: Sun, 24 May 2020 21:15:19 -0400
Subject: [PATCH 20/24] Clean up filter_map thanks to @jyn514

---
 src/lex/cpp.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/lex/cpp.rs b/src/lex/cpp.rs
index 89d83a93..5a19b025 100644
--- a/src/lex/cpp.rs
+++ b/src/lex/cpp.rs
@@ -1278,8 +1278,7 @@ mod tests {
     fn assert_same_exact(src: &str, cpp_src: &str) {
         // NOTE make sure `cpp_src` has a trailing newline
         let pprint = cpp(src)
-            .filter_map(|res| res.ok().map(|token| token.data))
-            .map(|res| format!("{}", res))
+            .filter_map(|res| res.ok().map(|token| token.data.to_string()))
             .collect::<Vec<_>>()
             .join("");
         assert_eq!(pprint, format!("{}\n", cpp_src)); // Because `cpp` adds newline, do it here too

From 0e055b9d16a21ef2b34df0fdf441534b4c788b2b Mon Sep 17 00:00:00 2001
From: Hunter Damron <hdamron1594@yahoo.com>
Date: Sun, 24 May 2020 22:57:11 -0400
Subject: [PATCH 21/24] Get tokens_until_newline to do what it's name suggests
 and other whitespace improvements for preprocessor macros

---
 src/lex/cpp.rs   | 17 ++++++++++-------
 src/lex/files.rs | 17 ++++++++---------
 src/lex/mod.rs   | 38 +++++++++++++++++++++++++++++---------
 3 files changed, 47 insertions(+), 25 deletions(-)

diff --git a/src/lex/cpp.rs b/src/lex/cpp.rs
index 5a19b025..b2a3b5fc 100644
--- a/src/lex/cpp.rs
+++ b/src/lex/cpp.rs
@@ -896,6 +896,7 @@ impl<'a> PreProcessor<'a> {
         let body = |this: &mut PreProcessor| {
             this.tokens_until_newline(true)
                 .into_iter()
+                .skip(1)  // First is always unwanted whitespace
                 .map(|res| res.map(|loc| loc.data))
                 .collect::<Result<Vec<_>, Locatable<Error>>>()
         };
@@ -934,8 +935,8 @@ impl<'a> PreProcessor<'a> {
     // `#include "file"` - local include, but falls back to system include if `file` is not found.
     fn include(&mut self, start: u32) -> Result<(), Locatable<Error>> {
         use crate::data::lex::ComparisonToken;
+        self.consume_whitespace_oneline(start, CppError::EmptyInclude)?;
         let lexer = self.lexer_mut();
-        lexer.consume_whitespace();
         let local = if lexer.match_next(b'"') {
             true
         } else if lexer.match_next(b'<') {
@@ -1484,20 +1485,20 @@ d
     #[test]
     fn pragma() {
         let src = "#pragma gcc __attribute__((inline))";
-        assert!(cpp(src).next().is_none());
+        assert!(cpp(src).next_non_whitespace().is_none());
     }
     #[test]
     fn line() {
         let src = "#line 1";
         let mut cpp = cpp(src);
-        assert!(cpp.next().is_none());
+        assert!(cpp.next_non_whitespace().is_none());
         assert!(cpp.warnings().pop_front().is_some());
     }
     #[test]
     fn warning() {
         let src = "#warning your pants are on file";
         let mut cpp = cpp(src);
-        assert!(cpp.next().is_none());
+        assert!(cpp.next_non_whitespace().is_none());
         assert!(cpp.warnings().pop_front().is_some());
     }
     #[test]
@@ -1508,8 +1509,8 @@ d
     fn invalid_directive() {
         assert_err!("#wrong", CppError::InvalidDirective, "invalid directive",);
         assert_err!("#1", CppError::UnexpectedToken(_, _), "unexpected token",);
-        assert_err!("#include", CppError::EndOfFile(_), "end of file");
-        assert_err!("#if defined", CppError::EndOfFile(_), "end of file");
+        assert_err!("#include", CppError::EmptyInclude, "empty include");
+        assert_err!("#if defined", CppError::EndOfFile(_), "unexpected eof");
         for s in &[
             "#if defined()",
             "#if defined(+)",
@@ -1594,6 +1595,8 @@ int main(){}
         assert_same_exact("int \t\n\r     main() {}", "int \t\n\r     main() {}");
         assert_same_exact("int/* */main() {}", "int main() {}");
         assert_same_exact("int/*\n\n\n*/main() {}", "int\n\n\nmain() {}");
-        // TODO add tests for `assert_same_exact` with preprocessor macros
+        assert_same_exact("#define a(c) c\tc\na(1);a(2)", "\n1\t1;2\t2");
+        // assert_same_exact("#define a //\n#if defined a\n  x\n#endif", "\n\n  x");
+        // TODO add mote tests for `assert_same_exact` with preprocessor macros
     }
 }
diff --git a/src/lex/files.rs b/src/lex/files.rs
index 8f2f6dd5..fea21fb1 100644
--- a/src/lex/files.rs
+++ b/src/lex/files.rs
@@ -123,6 +123,10 @@ impl FileProcessor {
         self.lexer_mut().consume_whitespace()
     }
     #[inline]
+    pub(super) fn consume_whitespace_preprocessor(&mut self) -> String {
+        self.lexer_mut().consume_whitespace_preprocessor()
+    }
+    #[inline]
     pub(super) fn seen_line_token(&self) -> bool {
         self.lexer().seen_line_token
     }
@@ -147,24 +151,19 @@ impl FileProcessor {
         whitespace: bool,
     ) -> Vec<CompileResult<Locatable<Token>>> {
         let mut tokens = Vec::new();
-        let line = self.line();
         loop {
             let ws_start = self.offset();
-            let ws = self.consume_whitespace();
+            let ws = self.consume_whitespace_preprocessor();
             let ws_span = self.span(ws_start);
-            if self.line() != line {
-                // lines should end with a newline, but in case they don't, don't crash
-                assert!(!self.lexer().seen_line_token || self.lexer_mut().peek().is_none(),
-                    "expected `tokens_until_newline()` to reset `seen_line_token`, but `lexer.peek()` is {:?}",
-                    self.lexer_mut().peek());
-                break;
-            }
             if whitespace && !ws.is_empty() {
                 tokens.push(Ok(Locatable {
                     data: Token::Whitespace(ws), // NOTE: in clang, this is one space
                     location: ws_span,
                 }));
             }
+            if self.lexer_mut().peek().unwrap_or(b'\n') == b'\n' {
+                break;
+            }
             match self.next() {
                 Some(token) => tokens.push(token),
                 None => break,
diff --git a/src/lex/mod.rs b/src/lex/mod.rs
index 3927cbe6..93fad215 100644
--- a/src/lex/mod.rs
+++ b/src/lex/mod.rs
@@ -186,17 +186,35 @@ impl Lexer {
             file: self.location.file,
         }
     }
+
+    #[inline]
+    fn consume_whitespace(&mut self) -> String {
+        self.consume_whitespace_full(false, true)
+    }
+    #[inline]
+    fn consume_whitespace_preprocessor(&mut self) -> String {
+        self.consume_whitespace_full(true, false)
+    }
     /// Remove all consecutive whitespace pending in the stream.
     /// This includes comments.
     ///
+    /// If `stop_at_newline` this stops at the end of the line (unless there's a comment)
+    /// If `comments_newlines` then multiline comments are replaced with their newlines else space
+    ///
     /// Before: b"    // some comment\n /*multi comment*/hello   "
     /// After:  b"hello   "
-    fn consume_whitespace(&mut self) -> String {
+    fn consume_whitespace_full(
+        &mut self,
+        stop_at_newline: bool,
+        comments_newlines: bool,
+    ) -> String {
         // there may be comments following whitespace
         let mut whitespace = String::new();
         loop {
             // whitespace
-            while self.peek().map_or(false, |c| c.is_ascii_whitespace()) {
+            while self.peek().map_or(false, |c| {
+                c.is_ascii_whitespace() && !(stop_at_newline && c == b'\n')
+            }) {
                 if let Some(c) = self.next_char() {
                     whitespace.push(c.into());
                 }
@@ -204,15 +222,15 @@ impl Lexer {
             // comments
             if self.peek() == Some(b'/') {
                 match self.peek_next() {
-                    Some(b'/') => {
-                        self.consume_line_comment();
-                        whitespace.push('\n');
-                    }
+                    Some(b'/') => self.consume_line_comment(),
                     Some(b'*') => {
                         self.next_char();
                         self.next_char();
                         match self.consume_multi_comment() {
-                            Ok(ws) => whitespace.push_str(&ws),
+                            Ok(ws) => {
+                                let ws = if comments_newlines { &ws } else { " " };
+                                whitespace.push_str(ws)
+                            }
                             Err(err) => self.error_handler.push_back(err),
                         }
                     }
@@ -230,9 +248,11 @@ impl Lexer {
     /// After:  chars{"hello // blah"}
     fn consume_line_comment(&mut self) {
         loop {
-            match self.next_char() {
+            match self.peek() {
                 None | Some(b'\n') => return,
-                _ => {}
+                _ => {
+                    self.next_char();
+                }
             }
         }
     }

From 3a9cd4aa03840060ef511f5bfaf8057287b12b2f Mon Sep 17 00:00:00 2001
From: Hunter Damron <hdamron1594@yahoo.com>
Date: Sun, 24 May 2020 23:34:14 -0400
Subject: [PATCH 22/24] More preprocess_only tests

---
 src/lex/cpp.rs | 49 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 47 insertions(+), 2 deletions(-)

diff --git a/src/lex/cpp.rs b/src/lex/cpp.rs
index b2a3b5fc..802e3d3e 100644
--- a/src/lex/cpp.rs
+++ b/src/lex/cpp.rs
@@ -1596,7 +1596,52 @@ int main(){}
         assert_same_exact("int/* */main() {}", "int main() {}");
         assert_same_exact("int/*\n\n\n*/main() {}", "int\n\n\nmain() {}");
         assert_same_exact("#define a(c) c\tc\na(1);a(2)", "\n1\t1;2\t2");
-        // assert_same_exact("#define a //\n#if defined a\n  x\n#endif", "\n\n  x");
-        // TODO add mote tests for `assert_same_exact` with preprocessor macros
+        assert_same_exact("#define a //\n#if defined a\n  x\n#endif", "\n\n  x\n");
+        assert_same_exact("#define x\n#undef x\n  x", "\n\n  x");
+        assert_same_exact("#pragma once\n  x", "\n  x");
+        assert_same_exact("#warning dont panic\n  x", "\n  x");
+        assert_same_exact("#error dont panic\n  x", "\n  x");
+        assert_same_exact("#line 1\n  x", "\n  x");
+        assert_same_exact(
+            "---
+#define a
+---
+#if 1
+  x
+  y
+  z
+#endif
+---
+#if 0
+  x
+#endif
+---
+#ifdef a
+  x
+#endif
+---
+#ifndef a
+  x
+#endif
+---",
+            "---
+
+---
+
+  x
+  y
+  z
+
+---
+
+---
+
+  x
+
+---
+
+---",
+        );
+        // TODO test for #includes
     }
 }

From 6d084cd61f2e1b5a6e63d7dc18c68c3df80eb979 Mon Sep 17 00:00:00 2001
From: Hunter Damron <hdamron1594@yahoo.com>
Date: Sun, 24 May 2020 23:48:31 -0400
Subject: [PATCH 23/24] Merge two versions of `is_not_whitespace`

---
 src/lex/cpp.rs | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/lex/cpp.rs b/src/lex/cpp.rs
index 802e3d3e..c02d72b4 100644
--- a/src/lex/cpp.rs
+++ b/src/lex/cpp.rs
@@ -1257,13 +1257,10 @@ mod tests {
             _ => panic!("not a keyword: {:?}", token),
         }
     }
-    fn _is_not_whitespace(res: &CompileResult<Token>) -> bool {
-        !matches!(res, Ok(Token::Whitespace(_)))
-    }
     fn is_same_preprocessed(xs: PreProcessor, ys: PreProcessor) -> bool {
         let to_vec = |xs: PreProcessor| {
-            xs.map(|res| res.map(|token| token.data))
-                .filter(_is_not_whitespace)
+            xs.filter(PreProcessor::is_not_whitespace)
+                .map(|res| res.map(|token| token.data))
                 .collect::<Vec<_>>()
         };
         to_vec(xs) == to_vec(ys)

From 0d03378729800deb07fe6cb46964921b444dc6f0 Mon Sep 17 00:00:00 2001
From: Hunter Damron <hdamron1594@yahoo.com>
Date: Mon, 25 May 2020 00:18:18 -0400
Subject: [PATCH 24/24] Do not assume there is whitespace between define id and
 body

---
 src/lex/cpp.rs | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/lex/cpp.rs b/src/lex/cpp.rs
index c02d72b4..df726862 100644
--- a/src/lex/cpp.rs
+++ b/src/lex/cpp.rs
@@ -414,8 +414,8 @@ impl<'a> PreProcessor<'a> {
         self.file_processor.tokens_until_newline(whitespace)
     }
 
-    fn is_not_whitespace(res: &CppResult<Token>) -> bool {
-        !matches!(
+    fn is_whitespace(res: &CppResult<Token>) -> bool {
+        matches!(
             res,
             Ok(Locatable {
                 data: Token::Whitespace(_),
@@ -423,6 +423,9 @@ impl<'a> PreProcessor<'a> {
             })
         )
     }
+    fn is_not_whitespace(res: &CppResult<Token>) -> bool {
+        !PreProcessor::is_whitespace(res)
+    }
 
     /// If at the start of the line and we see `#directive`, return that directive.
     /// Otherwise, if we see a token (or error), return that error.
@@ -896,7 +899,7 @@ impl<'a> PreProcessor<'a> {
         let body = |this: &mut PreProcessor| {
             this.tokens_until_newline(true)
                 .into_iter()
-                .skip(1)  // First is always unwanted whitespace
+                .skip_while(PreProcessor::is_whitespace)  // TODO warning if nothing skips
                 .map(|res| res.map(|loc| loc.data))
                 .collect::<Result<Vec<_>, Locatable<Error>>>()
         };