diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..2a0b65e --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "[rust]": { + "editor.defaultFormatter": "rust-lang.rust-analyzer" + } +} diff --git a/generator/src/main.rs b/generator/src/main.rs index 7ffd32c..f35cf99 100644 --- a/generator/src/main.rs +++ b/generator/src/main.rs @@ -3,6 +3,8 @@ extern crate markdown; use std::fs; use std::path::Path; +use markdown::markdown_to_html; + trait Generator { fn get_md_files(&self) -> Vec; fn generate_styles(&self); @@ -227,8 +229,8 @@ impl Generator for HtmlGenerator { } }; - let tokens = markdown::tokenize(&md); - let rendered_html = markdown::render_html(tokens); + let md = markdown::remove_frontmatter(&md); + let rendered_html = markdown_to_html(&md); let content = markdown::html_to_string(rendered_html); let content = truncate_to_char_boundary(&content, 300); @@ -281,8 +283,9 @@ impl Generator for HtmlGenerator { continue; } }; - let tokens = markdown::tokenize(&md); - let html = markdown::render_html(tokens); + + let md = markdown::remove_frontmatter(&md); + let html = markdown_to_html(&md); let html = self.post_template(html, title, created_at); let pathname = md_file.replace(".md", ""); diff --git a/markdown/src/generator.rs b/markdown/src/generator.rs new file mode 100644 index 0000000..84da168 --- /dev/null +++ b/markdown/src/generator.rs @@ -0,0 +1,244 @@ +use std::collections::VecDeque; + +use crate::token::Token; + +pub fn generate_html(tokens: VecDeque) -> String { + let mut output = String::new(); + + for token in tokens { + match token { + Token::Heading { level, content } => { + output.push_str(&format!("{}\n", level, content, level)); + } + Token::ListItem(content) => { + for token in content { + match token { + Token::Text(text) => { + output.push_str(&format!("
  • {}
  • \n", text)); + } + Token::Link { text, url } => { + output + .push_str(&format!("
  • {}
  • \n", url, text)); + } + _ => {} + } + } + } + Token::Paragraph(content) => { + output.push_str(&format!("

    {}

    \n", content)); + } + Token::Bold(content) => { + output.push_str(&format!("{}\n", content)); + } + Token::Italic(content) => { + output.push_str(&format!("{}\n", content)); + } + Token::Link { text, url } => { + output.push_str(&format!("{}\n", url, text)); + } + Token::CodeBlock { language, content } => { + output.push_str(&format!( + "
    {}
    \n", + language, content + )); + } + Token::InlineCode(content) => { + output.push_str(&format!("{}\n", content)); + } + Token::BlockQuote(content) => { + output.push_str(&format!("
    {}
    \n", content)); + } + Token::Image { src, alt } => { + output.push_str(&format!("\"{}\"\n", src, alt)); + } + Token::Break => { + output.push_str("
    \n"); + } + Token::Text(content) => { + output.push_str(&format!("{}\n", content)); + } + Token::LinkCard(content) => { + output.push_str(&format!( + r#"
    +
    +
    +
    + +
    +
    +

    loading...

    +
    +
    +
    + "#, + content, content, + )); + } + Token::Twitter(_) => { + // output.push_str(&format!( + // "
    \n", + // content + // )); + output.push_str("

    twitter card is not supported anymore...

    \n") + } + _ => {} + } + } + + output +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_generate_html_heading() { + let tokens = VecDeque::from(vec![Token::Heading { + level: 1, + content: "Title".to_string(), + }]); + let html = generate_html(tokens); + assert_eq!(html, "

    Title

    \n"); + } + + #[test] + fn test_generate_html_list() { + let tokens = VecDeque::from(vec![Token::ListItem(vec![Token::Text( + "Item 1".to_string(), + )])]); + let html = generate_html(tokens); + assert_eq!(html, "
  • Item 1
  • \n"); + } + + #[test] + fn test_generate_html_paragraph() { + let tokens = VecDeque::from(vec![Token::Paragraph("Normal text here.".to_string())]); + let html = generate_html(tokens); + assert_eq!(html, "

    Normal text here.

    \n"); + } + + #[test] + fn test_generate_html_bold() { + let tokens = VecDeque::from(vec![Token::Bold("Bold text".to_string())]); + let html = generate_html(tokens); + assert_eq!(html, "Bold text\n"); + } + + #[test] + fn test_generate_html_italic() { + let tokens = VecDeque::from(vec![Token::Italic("Italic text".to_string())]); + let html = generate_html(tokens); + assert_eq!(html, "Italic text\n"); + } + + #[test] + fn test_generate_html_link() { + let tokens = VecDeque::from(vec![Token::Link { + text: "Link text".to_string(), + url: "https://example.com".to_string(), + }]); + let html = generate_html(tokens); + assert_eq!(html, "Link text\n"); + } + + #[test] + fn test_generate_html_code_block() { + let tokens = VecDeque::from(vec![Token::CodeBlock { + language: "rust".to_string(), + content: "fn main() {\n println!(\"Hello, world!\");\n}\n".to_string(), + }]); + let html = generate_html(tokens); + assert_eq!(html, "
    fn main() {\n    println!(\"Hello, world!\");\n}\n
    \n"); + } + + #[test] + fn test_generate_html_inline_code() { + let tokens = VecDeque::from(vec![Token::InlineCode("let x = 42;".to_string())]); + let html = generate_html(tokens); + assert_eq!(html, "let x = 42;\n"); + } + + #[test] + fn test_generate_html_block_quote() { + let tokens = VecDeque::from(vec![Token::BlockQuote( + "This is a block quote.".to_string(), + )]); + let html = generate_html(tokens); + assert_eq!(html, "
    This is a block quote.
    \n"); + } + + #[test] + fn test_generate_html_image() { + let tokens = VecDeque::from(vec![Token::Image { + src: "https://example.com/image.jpg".to_string(), + alt: "Alt text".to_string(), + }]); + let html = generate_html(tokens); + assert_eq!( + html, + "\"Alt\n" + ); + } + + #[test] + fn test_generate_html_link_card() { + let tokens = VecDeque::from(vec![Token::LinkCard("https://example.com".to_string())]); + let html = generate_html(tokens); + assert_eq!( + html, + r#"
    +
    +
    +
    + +
    +
    +

    loading...

    +
    +
    +
    + "# + ); + } + + #[test] + fn test_generate_html_twitter() { + let tokens = VecDeque::from(vec![Token::Twitter( + "https://twitter.com/foo/status/11111111".to_string(), + )]); + let html = generate_html(tokens); + assert_eq!(html, "

    twitter card is not supported anymore...

    \n"); + } + + #[test] + fn test_generate_html() { + let tokens = VecDeque::from(vec![ + Token::Heading { + level: 1, + content: "Title".to_string(), + }, + Token::Heading { + level: 2, + content: "Subtitle".to_string(), + }, + Token::Heading { + level: 3, + content: "Sub-subtitle".to_string(), + }, + Token::ListItem(vec![ + Token::Text("Item 1".to_string()), + Token::Text("Item 2".to_string()), + Token::Text("Item 3".to_string()), + ]), + // Token::ListItem("Item 2".to_string()), + // Token::ListItem("Item 3".to_string()), + Token::Paragraph("Normal text here.".to_string()), + ]); + let html = generate_html(tokens); + assert_eq!( + html, + "

    Title

    \n

    Subtitle

    \n

    Sub-subtitle

    \n
  • Item 1
  • \n
  • Item 2
  • \n
  • Item 3
  • \n

    Normal text here.

    \n" + ); + } +} diff --git a/markdown/src/lexer.rs b/markdown/src/lexer.rs new file mode 100644 index 0000000..12ecf94 --- /dev/null +++ b/markdown/src/lexer.rs @@ -0,0 +1,642 @@ +use crate::token::Token; + +pub struct Lexer<'a> { + input: &'a str, + position: usize, + read_position: usize, + ch: Option, +} + +impl<'a> Lexer<'a> { + pub fn new(input: &'a str) -> Self { + let mut lexer = Lexer { + input, + position: 0, + read_position: 0, + ch: None, + }; + lexer.read_char(); + lexer + } + + fn read_char(&mut self) { + self.ch = if self.read_position >= self.input.len() { + None + } else { + Some(self.input[self.read_position..].chars().next().unwrap()) + }; + self.position = self.read_position; + self.read_position += self.ch.map_or(1, |ch| ch.len_utf8()); + } + + fn peek_char(&self) -> Option { + if self.read_position >= self.input.len() { + None + } else { + Some(self.input[self.read_position..].chars().next().unwrap()) + } + } + + pub fn next_token(&mut self) -> Option { + self.skip_whitespace(); + + match self.ch { + Some('#') => Some(self.read_heading()), + Some('*') => { + if self.peek_char() == Some(' ') { + Some(self.read_list()) + } else { + Some(self.read_italic_or_bold()) + } + } + Some('[') => Some(self.read_link()), + Some('-') => { + if self.peek_char() == Some(' ') { + Some(self.read_list()) + } else { + Some(self.read_text()) + } + } + Some('`') => Some(self.read_code_block_or_inline_code()), + Some('>') => Some(self.read_quote()), + Some('!') => { + if self.peek_char() == Some('[') { + Some(self.read_image()) + } else { + Some(self.read_text()) + } + } + // Some('1') => { + // if self.peek_char() == Some('.') { + // Some(self.read_ordered_list()) + // } else { + // Some(self.read_text()) + // } + // } + Some(' ') => { + if self.peek_char() == Some(' ') { + Some(self.read_break()) + } else { + Some(self.read_text()) + } + } + + // ここから下はカスタムのシンタックス + Some('@') => { + // @og[URL] の形式かどうかを確認 + if self.peek_char() == Some('o') { + Some(self.read_link_card()) + } else if self.peek_char() == Some('t') { + Some(self.read_twitter()) + } else { + Some(self.read_text()) + } + } + Some(_) => Some(self.read_text()), + None => None, + } + } + + fn read_heading(&mut self) -> Token { + let mut level = 0; + while let Some('#') = self.ch { + level += 1; + self.read_char(); + } + + self.skip_whitespace(); + + let start = self.position; + while let Some(ch) = self.ch { + if ch == '\n' { + break; + } + self.read_char(); + } + Token::Heading { + level, + content: self.input[start..self.position].to_string(), + } + } + + fn read_italic_or_bold(&mut self) -> Token { + if self.peek_char() == Some('*') { + // Consume the second '*' + self.read_char(); + self.read_char(); + let content_start = self.position; + while let Some(ch) = self.ch { + if ch == '*' && self.peek_char() == Some('*') { + break; + } + self.read_char(); + } + let token = Token::Bold(self.input[content_start..self.position].to_string()); + self.read_char(); + self.read_char(); + + token + } else { + self.read_char(); + let content_start = self.position; + while let Some(ch) = self.ch { + if ch == '*' { + break; + } + self.read_char(); + } + + let token = Token::Italic(self.input[content_start..self.position].to_string()); + + self.read_char(); + + token + } + } + + fn read_list(&mut self) -> Token { + self.read_char(); + self.read_char(); + let mut tokens = vec![]; + + // - [link](https://example.com) であれば ListItem(Token::Link) になる + if self.ch == Some('[') { + tokens.push(self.read_link()); + } else { + let start = self.position; + while let Some(ch) = self.ch { + if ch == '\n' { + break; + } + self.read_char(); + } + tokens.push(Token::Text(self.input[start..self.position].to_string())); + } + + Token::ListItem(tokens) + } + + fn read_link(&mut self) -> Token { + self.read_char(); + + let start = self.position; + while let Some(ch) = self.ch { + if ch == ']' { + break; + } + self.read_char(); + } + + let text = self.input[start..self.position].to_string(); + + // Skip the closing ']' and opening '(' + self.read_char(); + self.read_char(); + + let link_start = self.position; + while let Some(ch) = self.ch { + if ch == ')' { + break; + } + self.read_char(); + } + + let url = self.input[link_start..self.position].to_string(); + + // Skip the closing ')' + self.read_char(); + + Token::Link { + text: text, + url: url, + } + } + + fn read_code_block_or_inline_code(&mut self) -> Token { + if self.peek_char() == Some('`') { + self.read_code_block() + } else { + self.read_inline_code() + } + } + + fn read_code_block(&mut self) -> Token { + // ``` + self.read_char(); + self.read_char(); + self.read_char(); + + let start = self.position; + while let Some(ch) = self.ch { + if ch == '\n' { + break; + } + self.read_char(); + } + + let language = self.input[start..self.position].to_string(); + + // \n + self.read_char(); + + let start = self.position; + while let Some(ch) = self.ch { + if ch == '`' && self.peek_char() == Some('`') && self.peek_char() == Some('`') { + break; + } + self.read_char(); + } + + let content = self.input[start..self.position].to_string(); + + self.read_char(); + self.read_char(); + self.read_char(); + + Token::CodeBlock { + language: language, + content: content, + } + } + + fn read_inline_code(&mut self) -> Token { + // ` + self.read_char(); + + let start = self.position; + while let Some(ch) = self.ch { + if ch == '`' { + break; + } + self.read_char(); + } + + let content = self.input[start..self.position].to_string(); + + // ` + self.read_char(); + + Token::InlineCode(content) + } + + fn read_quote(&mut self) -> Token { + self.read_char(); + self.read_char(); + let start = self.position; + while let Some(ch) = self.ch { + if ch == '\n' { + break; + } + self.read_char(); + } + Token::BlockQuote(self.input[start..self.position].to_string()) + } + + fn read_image(&mut self) -> Token { + self.read_char(); + self.read_char(); + + let start = self.position; + while let Some(ch) = self.ch { + if ch == ']' { + break; + } + self.read_char(); + } + + let alt = self.input[start..self.position].to_string(); + + // Skip the closing ']' and opening '(' + self.read_char(); + self.read_char(); + + let link_start = self.position; + while let Some(ch) = self.ch { + if ch == ')' { + break; + } + self.read_char(); + } + + let src = self.input[link_start..self.position].to_string(); + + // Skip the closing ')' + self.read_char(); + + Token::Image { src, alt } + } + + fn read_break(&mut self) -> Token { + self.read_char(); + self.read_char(); + Token::Break + } + + fn read_text(&mut self) -> Token { + let start = self.position; + while let Some(ch) = self.ch { + if ch == '\n' { + break; + } + self.read_char(); + } + Token::Paragraph(self.input[start..self.position].to_string()) + } + + // fn read_ordered_list(&mut self) -> Token {} + + fn read_link_card(&mut self) -> Token { + let content = self.input[self.position..].to_string(); + // @og[URL] で表現されるカスタムシンタックス + self.read_char(); + self.read_char(); + + if self.peek_char() != Some('[') { + return Token::Paragraph(content); + } + + self.read_char(); + self.read_char(); + + let start = self.position; + while let Some(ch) = self.ch { + if ch == ']' { + break; + } + self.read_char(); + } + + let url = self.input[start..self.position].to_string(); + + // ] + self.read_char(); + + Token::LinkCard(url) + } + + // @twitter[URL] で表現されるカスタムシンタックス + fn read_twitter(&mut self) -> Token { + let content = self.input[self.position..].to_string(); + // 多少強引だけど、twitter の文字数(7文字)をスキップする + self.read_char(); + self.read_char(); + self.read_char(); + self.read_char(); + self.read_char(); + self.read_char(); + self.read_char(); + + if self.peek_char() != Some('[') { + return Token::Paragraph(content); + } + + self.read_char(); + self.read_char(); + + let start = self.position; + while let Some(ch) = self.ch { + if ch == ']' { + break; + } + self.read_char(); + } + + let url = self.input[start..self.position].to_string(); + + // ] + self.read_char(); + + Token::Twitter(url) + } + + fn skip_whitespace(&mut self) { + while let Some(ch) = self.ch { + if !ch.is_whitespace() { + break; + } + self.read_char(); + } + } +} + +#[cfg(test)] +mod tests { + use std::vec; + + use super::*; + + #[test] + fn test_next_token() { + let input = "# Title\n"; + let mut lexer = Lexer::new(input); + let token = lexer.next_token(); + assert_eq!( + token, + Some(Token::Heading { + level: 1, + content: "Title".to_string() + }) + ); + } + + #[test] + fn test_next_token_bold() { + let input = "**Bold**\n"; + let mut lexer = Lexer::new(input); + let token = lexer.next_token(); + assert_eq!(token, Some(Token::Bold("Bold".to_string()))); + } + + #[test] + fn test_next_token_italic() { + let input = "*Italic*\n"; + let mut lexer = Lexer::new(input); + let token = lexer.next_token(); + assert_eq!(token, Some(Token::Italic("Italic".to_string()))); + } + + #[test] + fn test_next_token_link() { + let input = "[Link](https://example.com)\n"; + let mut lexer = Lexer::new(input); + let token = lexer.next_token(); + assert_eq!( + token, + Some(Token::Link { + text: "Link".to_string(), + url: "https://example.com".to_string() + }) + ); + } + + #[test] + fn test_next_token_paragraph() { + let input = "Normal text\n"; + let mut lexer = Lexer::new(input); + let token = lexer.next_token(); + assert_eq!(token, Some(Token::Paragraph("Normal text".to_string()))); + } + + #[test] + fn test_next_token_paragraph2() { + let input = "SHA-256\n"; + let mut lexer = Lexer::new(input); + let token = lexer.next_token(); + assert_eq!(token, Some(Token::Paragraph("SHA-256".to_string()))); + } + + #[test] + fn test_next_token_multiple() { + let input = "# Title\n**Bold**\n*Italic*\n[Link](https://example.com)\n"; + let mut lexer = Lexer::new(input); + let token = lexer.next_token(); + assert_eq!( + token, + Some(Token::Heading { + level: 1, + content: "Title".to_string() + }) + ); + let token = lexer.next_token(); + assert_eq!(token, Some(Token::Bold("Bold".to_string()))); + let token = lexer.next_token(); + assert_eq!(token, Some(Token::Italic("Italic".to_string()))); + let token = lexer.next_token(); + assert_eq!( + token, + Some(Token::Link { + text: "Link".to_string(), + url: "https://example.com".to_string() + }) + ); + } + + #[test] + fn test_next_token_whitespace() { + let input = " \n# Title\n \n"; + let mut lexer = Lexer::new(input); + let token = lexer.next_token(); + assert_eq!( + token, + Some(Token::Heading { + level: 1, + content: "Title".to_string() + }) + ); + } + + #[test] + fn test_next_token_empty() { + let input = ""; + let mut lexer = Lexer::new(input); + let token = lexer.next_token(); + assert_eq!(token, None); + } + + #[test] + fn test_next_token_code_block() { + let input = "```rust\nfn main() {\n println!(\"Hello, world!\");\n}\n```\n"; + let mut lexer = Lexer::new(input); + let token = lexer.next_token(); + assert_eq!( + token, + Some(Token::CodeBlock { + language: "rust".to_string(), + content: "fn main() {\n println!(\"Hello, world!\");\n}\n".to_string() + }) + ); + } + + #[test] + fn test_next_token_list() { + let input = "* Item 1\n"; + let mut lexer = Lexer::new(input); + let token = lexer.next_token(); + assert_eq!( + token, + Some(Token::ListItem(vec![Token::Text("Item 1".to_string())])) + ); + } + + #[test] + fn test_next_token_list_with_link() { + let input = "* [Link](https://example.com)\n"; + let mut lexer = Lexer::new(input); + let token = lexer.next_token(); + assert_eq!( + token, + Some(Token::ListItem(vec![Token::Link { + text: "Link".to_string(), + url: "https://example.com".to_string() + }])) + ); + } + + #[test] + fn test_next_token_quote() { + let input = "> Quote\n"; + let mut lexer = Lexer::new(input); + let token = lexer.next_token(); + assert_eq!(token, Some(Token::BlockQuote("Quote".to_string()))); + } + + #[test] + fn test_next_token_image() { + let input = "![Alt text](https://example.com/image.png)\n"; + let mut lexer = Lexer::new(input); + let token = lexer.next_token(); + assert_eq!( + token, + Some(Token::Image { + src: "https://example.com/image.png".to_string(), + alt: "Alt text".to_string() + }) + ); + } + + #[test] + fn test_next_token_link_card() { + let input = "@og[https://example.com]\n"; + let mut lexer = Lexer::new(input); + let token = lexer.next_token(); + assert_eq!( + token, + Some(Token::LinkCard("https://example.com".to_string())) + ); + } + + #[test] + fn test_next_token_twitter() { + let input = "@twitter[https://example.com]\n"; + let mut lexer = Lexer::new(input); + let token = lexer.next_token(); + assert_eq!( + token, + Some(Token::Twitter("https://example.com".to_string())) + ); + } + + // #[test] + // fn test_next_token_ordered_list() { + // let input = "1. Item 1\n"; + // let mut lexer = Lexer::new(input); + // let token = lexer.next_token(); + // assert_eq!(token, Some(Token::ListItem("Item 1".to_string()))); + // } + + #[test] + fn test_next_token_list_multiple() { + let input = "* Item 1\n* Item 2\n"; + let mut lexer = Lexer::new(input); + let token = lexer.next_token(); + assert_eq!( + token, + Some(Token::ListItem(vec![Token::Text("Item 1".to_string())])) + ); + let token = lexer.next_token(); + assert_eq!( + token, + Some(Token::ListItem(vec![Token::Text("Item 2".to_string())])) + ); + } +} diff --git a/markdown/src/lib.rs b/markdown/src/lib.rs index 50fcf2a..b825fcc 100644 --- a/markdown/src/lib.rs +++ b/markdown/src/lib.rs @@ -1,155 +1,7 @@ extern crate regex; use regex::Regex; -use std::collections::{HashMap, VecDeque}; - -#[derive(Debug)] -pub enum Token { - Heading { level: usize, content: String }, - ListItem(String), - OrderedList(Vec), - Bold(String), - Italic(String), - Link { text: String, url: String }, - CodeBlock { language: String, content: String }, - InlineCode(String), - Paragraph(String), - BlockQuote(String), - Image { src: String, alt: String }, - - // === ここから下はカスタムのシンタックス === - LinkCard(String), - Twitter(String), -} - -// MEMO: tokenizer に frontmatter があるのはおかしいので修正する -pub fn tokenize(input: &str) -> VecDeque { - let front_matter_re = Regex::new(r"---\n([\s\S]*?)\n---\n\n([\s\S]*)").unwrap(); - let captures = match front_matter_re.captures(input) { - Some(captures) => captures, - None => { - // 何もしない - return VecDeque::new(); - } - }; - - let input = captures.get(2).unwrap().as_str().to_string(); - let mut tokens = VecDeque::new(); - - let heading_regex = Regex::new(r"^(#{1,6}) (.+)$").unwrap(); - let list_item_regex = Regex::new(r"^\* (.+)$").unwrap(); - let ordered_list_regex = Regex::new(r"^\d+\. (.+)$").unwrap(); - let bold_regex = Regex::new(r"\*\*(.+?)\*\*").unwrap(); - let italic_regex = Regex::new(r"\*(.+?)\*").unwrap(); - let link_regex = Regex::new(r"\[(.+?)\]\((.+?)\)").unwrap(); - let code_block_start_regex = Regex::new(r"^```(\w*)").unwrap(); - let inline_code_regex = Regex::new(r"`(.+?)`").unwrap(); - let block_quote_regex = Regex::new(r"^> (.+)").unwrap(); - let image_regex = Regex::new(r"!\[(.+?)\]\((.+?)\)").unwrap(); - - // === ここから下はカスタムのシンタックス === - let link_card = Regex::new(r"^@og\[(.*)\]").unwrap(); - // MEMO: Twitter card、もう正しく動く保証がないので廃止にしてもいいかもしれない - let twitter = Regex::new(r"^@twitter\[(.*)\]").unwrap(); - - let mut in_code_block = false; - let mut code_block_content = String::new(); - let mut code_block_language = String::new(); - - for line in input.lines() { - if in_code_block { - if code_block_start_regex.is_match(line) { - tokens.push_back(Token::CodeBlock { - language: code_block_language.clone(), - content: code_block_content.clone(), - }); - code_block_content.clear(); - code_block_language.clear(); - in_code_block = false; - } else { - code_block_content.push_str(line); - code_block_content.push('\n'); - } - continue; - } - - if code_block_start_regex.is_match(line) { - let captures = code_block_start_regex.captures(line).unwrap(); - code_block_language = captures.get(1).map_or("", |m| m.as_str()).to_string(); - in_code_block = true; - continue; - } - - if heading_regex.is_match(line) { - let captures = heading_regex.captures(line).unwrap(); - let level = captures.get(1).unwrap().as_str().len(); - let content = captures.get(2).unwrap().as_str(); - tokens.push_back(Token::Heading { - level, - content: content.to_string(), - }); - } else if image_regex.is_match(line) { - let captures = image_regex.captures(line).unwrap(); - let alt = captures.get(1).unwrap().as_str(); - let src = captures.get(2).unwrap().as_str(); - tokens.push_back(Token::Image { - src: src.to_string(), - alt: alt.to_string(), - }); - } else if list_item_regex.is_match(line) { - let captures = list_item_regex.captures(line).unwrap(); - let content = captures.get(1).unwrap().as_str(); - tokens.push_back(Token::ListItem(content.to_string())); - } else if ordered_list_regex.is_match(line) { - let captures = ordered_list_regex.captures(line).unwrap(); - let content = captures.get(1).unwrap().as_str(); - tokens.push_back(Token::OrderedList(vec![content.to_string()])); - } else if bold_regex.is_match(line) { - let captures = bold_regex.captures(line).unwrap(); - let content = captures.get(1).unwrap().as_str(); - tokens.push_back(Token::Bold(content.to_string())); - } else if italic_regex.is_match(line) { - let captures = italic_regex.captures(line).unwrap(); - let content = captures.get(1).unwrap().as_str(); - tokens.push_back(Token::Italic(content.to_string())); - } else if link_regex.is_match(line) { - let captures = link_regex.captures(line).unwrap(); - let text = captures.get(1).unwrap().as_str(); - let url = captures.get(2).unwrap().as_str(); - tokens.push_back(Token::Link { - text: text.to_string(), - url: url.to_string(), - }); - } else if inline_code_regex.is_match(line) { - let captures = inline_code_regex.captures(line).unwrap(); - let content = captures.get(1).unwrap().as_str(); - tokens.push_back(Token::InlineCode(content.to_string())); - } else if block_quote_regex.is_match(line) { - let captures = block_quote_regex.captures(line).unwrap(); - let content = captures.get(1).unwrap().as_str(); - tokens.push_back(Token::BlockQuote(content.to_string())); - } else if link_card.is_match(line) { - let captures = link_card.captures(line).unwrap(); - let content = captures.get(1).unwrap().as_str(); - tokens.push_back(Token::LinkCard(content.to_string())) - } else if twitter.is_match(line) { - let captures = twitter.captures(line).unwrap(); - let content = captures.get(1).unwrap().as_str(); - tokens.push_back(Token::Twitter(content.to_string())) - } else { - tokens.push_back(Token::Paragraph(line.to_string())); - } - } - - if in_code_block { - tokens.push_back(Token::CodeBlock { - language: code_block_language, - content: code_block_content, - }); - } - - tokens -} +use std::collections::HashMap; pub fn get_frontmatter(input: &str) -> HashMap { let frontmatter_re = Regex::new(r"---\n([\s\S]*?)\n---\n\n([\s\S]*)").unwrap(); @@ -182,415 +34,14 @@ pub fn get_frontmatter(input: &str) -> HashMap { map } -pub fn render_html(tokens: VecDeque) -> String { - let mut output = String::new(); - - for token in tokens { - match token { - Token::Heading { level, content } => { - output.push_str(&format!("{}\n", level, content, level)); - } - Token::ListItem(content) => { - output.push_str(&format!("
  • {}
  • \n", content)); - } - Token::Paragraph(content) => { - output.push_str(&format!("

    {}

    \n", content)); - } - Token::Bold(content) => { - output.push_str(&format!("{}\n", content)); - } - Token::Italic(content) => { - output.push_str(&format!("{}\n", content)); - } - Token::Link { text, url } => { - output.push_str(&format!("{}\n", url, text)); - } - Token::CodeBlock { language, content } => { - output.push_str(&format!( - "
    {}
    \n", - language, content - )); - } - Token::InlineCode(content) => { - output.push_str(&format!("{}\n", content)); - } - Token::BlockQuote(content) => { - output.push_str(&format!("
    {}
    \n", content)); - } - Token::Image { src, alt } => { - output.push_str(&format!("\"{}\"\n", src, alt)); - } - Token::LinkCard(content) => { - output.push_str(&format!( - r#"
    -
    -
    -
    - -
    -
    -

    loading...

    -
    -
    -
    - "#, - content, content, - )); - } - Token::Twitter(_) => { - // output.push_str(&format!( - // "
    \n", - // content - // )); - output.push_str("

    twitter card is not supported anymore...

    \n") - } - _ => {} - } - } - - output -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_tokenize_heading() { - let input = "# Title"; - let tokens = tokenize(input); - assert_eq!(tokens.len(), 1); - match tokens.get(0).unwrap() { - Token::Heading { level, content } => { - assert_eq!(*level, 1); - assert_eq!(content, "Title"); - } - _ => panic!("Unexpected token"), - } - } - - #[test] - fn test_tokenize_paragraph() { - let input = "Normal text here."; - let tokens = tokenize(input); - assert_eq!(tokens.len(), 1); - match tokens.get(0).unwrap() { - Token::Paragraph(content) => { - assert_eq!(content, "Normal text here."); - } - _ => panic!("Unexpected token"), - } - } - - #[test] - fn test_list() { - let input = "* Item 1\n"; - let tokens = tokenize(input); - assert_eq!(tokens.len(), 1); - println!("{:?}", tokens); - match tokens.get(0).unwrap() { - Token::ListItem(content) => { - assert_eq!(content, "Item 1"); - } - _ => panic!("Unexpected token"), - } - } - - #[test] - fn test_ordered_list() { - let input = "1. Item 1\n"; - let tokens = tokenize(input); - assert_eq!(tokens.len(), 1); - match tokens.get(0).unwrap() { - Token::OrderedList(items) => { - assert_eq!(items.len(), 1); - assert_eq!(items[0], "Item 1"); - } - _ => panic!("Unexpected token"), - } - } - - #[test] - fn test_bold() { - let input = "**Bold text**"; - let tokens = tokenize(input); - assert_eq!(tokens.len(), 1); - match tokens.get(0).unwrap() { - Token::Bold(content) => { - assert_eq!(content, "Bold text"); - } - _ => panic!("Unexpected token"), - } - } - - #[test] - fn test_italic() { - let input = "*Italic text*"; - let tokens = tokenize(input); - assert_eq!(tokens.len(), 1); - match tokens.get(0).unwrap() { - Token::Italic(content) => { - assert_eq!(content, "Italic text"); - } - _ => panic!("Unexpected token"), - } - } - - #[test] - fn test_link() { - let input = "[Link text](https://example.com)"; - let tokens = tokenize(input); - assert_eq!(tokens.len(), 1); - match tokens.get(0).unwrap() { - Token::Link { text, url } => { - assert_eq!(text, "Link text"); - assert_eq!(url, "https://example.com"); - } - _ => panic!("Unexpected token"), - } - } - - #[test] - fn test_code_block() { - let input = "```rust\nfn main() {\n println!(\"Hello, world!\");\n}\n```"; - let tokens = tokenize(input); - assert_eq!(tokens.len(), 1); - match tokens.get(0).unwrap() { - Token::CodeBlock { language, content } => { - assert_eq!(language, "rust"); - assert_eq!( - content, - "fn main() {\n println!(\"Hello, world!\");\n}\n" - ); - } - _ => panic!("Unexpected token"), - } - } - - #[test] - fn test_inline_code() { - let input = "`let x = 42;`"; - let tokens = tokenize(input); - assert_eq!(tokens.len(), 1); - match tokens.get(0).unwrap() { - Token::InlineCode(content) => { - assert_eq!(content, "let x = 42;"); - } - _ => panic!("Unexpected token"), - } - } - - #[test] - fn test_block_quote() { - let input = "> This is a block quote."; - let tokens = tokenize(input); - assert_eq!(tokens.len(), 1); - match tokens.get(0).unwrap() { - Token::BlockQuote(content) => { - assert_eq!(content, "This is a block quote."); - } - _ => panic!("Unexpected token"), - } - } - - #[test] - fn test_image() { - let input = "![Alt text](https://example.com/image.jpg)"; - let tokens = tokenize(input); - assert_eq!(tokens.len(), 1); - match tokens.get(0).unwrap() { - Token::Image { src, alt } => { - assert_eq!(src, "https://example.com/image.jpg"); - assert_eq!(alt, "Alt text"); - } - _ => panic!("Unexpected token"), - } - } - - #[test] - fn test_tokenize_link_card() { - let input = "@og[https://example.com]"; - let tokens = tokenize(input); - assert_eq!(tokens.len(), 1); - match tokens.get(0).unwrap() { - Token::LinkCard(content) => { - assert_eq!(content, "https://example.com"); - } - _ => panic!("Unexpected token"), - } - } - - #[test] - fn test_tokenize_twitter() { - let input = "@twitter[https://twitter.com/foo/status/11111111]"; - let tokens = tokenize(input); - assert_eq!(tokens.len(), 1); - match tokens.get(0).unwrap() { - Token::Twitter(content) => { - assert_eq!(content, "https://twitter.com/foo/status/11111111"); - } - _ => panic!("Unexpected token"), - } - } - - #[test] - fn test_render_html_heading() { - let tokens = VecDeque::from(vec![Token::Heading { - level: 1, - content: "Title".to_string(), - }]); - let html = render_html(tokens); - assert_eq!(html, "

    Title

    \n"); - } - - #[test] - fn test_render_html_list() { - let tokens = VecDeque::from(vec![Token::ListItem("Item 1".to_string())]); - let html = render_html(tokens); - assert_eq!(html, "
  • Item 1
  • \n"); - } - - #[test] - fn test_render_html_paragraph() { - let tokens = VecDeque::from(vec![Token::Paragraph("Normal text here.".to_string())]); - let html = render_html(tokens); - assert_eq!(html, "

    Normal text here.

    \n"); - } - - #[test] - fn test_render_html_bold() { - let tokens = VecDeque::from(vec![Token::Bold("Bold text".to_string())]); - let html = render_html(tokens); - assert_eq!(html, "Bold text\n"); - } - - #[test] - fn test_render_html_italic() { - let tokens = VecDeque::from(vec![Token::Italic("Italic text".to_string())]); - let html = render_html(tokens); - assert_eq!(html, "Italic text\n"); - } - - #[test] - fn test_render_html_link() { - let tokens = VecDeque::from(vec![Token::Link { - text: "Link text".to_string(), - url: "https://example.com".to_string(), - }]); - let html = render_html(tokens); - assert_eq!(html, "Link text\n"); - } - - #[test] - fn test_render_html_code_block() { - let tokens = VecDeque::from(vec![Token::CodeBlock { - language: "rust".to_string(), - content: "fn main() {\n println!(\"Hello, world!\");\n}\n".to_string(), - }]); - let html = render_html(tokens); - assert_eq!(html, "
    fn main() {\n    println!(\"Hello, world!\");\n}\n
    \n"); - } - - #[test] - fn test_render_html_inline_code() { - let tokens = VecDeque::from(vec![Token::InlineCode("let x = 42;".to_string())]); - let html = render_html(tokens); - assert_eq!(html, "let x = 42;\n"); - } - - #[test] - fn test_render_html_block_quote() { - let tokens = VecDeque::from(vec![Token::BlockQuote( - "This is a block quote.".to_string(), - )]); - let html = render_html(tokens); - assert_eq!(html, "
    This is a block quote.
    \n"); - } - - #[test] - fn test_render_html_image() { - let tokens = VecDeque::from(vec![Token::Image { - src: "https://example.com/image.jpg".to_string(), - alt: "Alt text".to_string(), - }]); - let html = render_html(tokens); - assert_eq!( - html, - "\"Alt\n" - ); - } - - #[test] - fn test_render_html_link_card() { - let tokens = VecDeque::from(vec![Token::LinkCard("https://example.com".to_string())]); - let html = render_html(tokens); - assert_eq!( - html, - r#"
    -
    -
    -
    - -
    -
    -

    loading...

    -
    -
    -
    - "# - ); - } - - #[test] - fn test_render_html_twitter() { - let tokens = VecDeque::from(vec![Token::Twitter( - "https://twitter.com/foo/status/11111111".to_string(), - )]); - let html = render_html(tokens); - assert_eq!(html, "

    twitter card is not supported anymore...

    \n"); - } - - #[test] - fn test_tokenize() { - let input = r#" -# Title -## Subtitle -### Sub-subtitle -* Item 1 -* Item 2 -* Item 3 -Normal text here. -"#; - let tokens = tokenize(input); - assert_eq!(tokens.len(), 8); - } +pub fn remove_frontmatter(input: &str) -> String { + let frontmatter_re = Regex::new(r"---\n([\s\S]*?)\n---\n\n([\s\S]*)").unwrap(); + let captures = match frontmatter_re.captures(input) { + Some(captures) => captures, + None => return input.to_string(), + }; - #[test] - fn test_render_html() { - let tokens = VecDeque::from(vec![ - Token::Heading { - level: 1, - content: "Title".to_string(), - }, - Token::Heading { - level: 2, - content: "Subtitle".to_string(), - }, - Token::Heading { - level: 3, - content: "Sub-subtitle".to_string(), - }, - Token::ListItem("Item 1".to_string()), - Token::ListItem("Item 2".to_string()), - Token::ListItem("Item 3".to_string()), - Token::Paragraph("Normal text here.".to_string()), - ]); - let html = render_html(tokens); - assert_eq!( - html, - "

    Title

    \n

    Subtitle

    \n

    Sub-subtitle

    \n
  • Item 1
  • \n
  • Item 2
  • \n
  • Item 3
  • \n

    Normal text here.

    \n" - ); - } + captures.get(2).unwrap().as_str().to_string() } pub fn html_to_string(html: String) -> String { @@ -619,3 +70,13 @@ pub fn html_to_string(html: String) -> String { result } + +pub mod generator; +pub mod lexer; +pub mod token; +pub mod tokenizer; + +pub fn markdown_to_html(input: &str) -> String { + let tokens = tokenizer::tokenize(input); + generator::generate_html(tokens) +} diff --git a/markdown/src/mod.rs b/markdown/src/mod.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/markdown/src/mod.rs @@ -0,0 +1 @@ + diff --git a/markdown/src/token.rs b/markdown/src/token.rs new file mode 100644 index 0000000..aefb218 --- /dev/null +++ b/markdown/src/token.rs @@ -0,0 +1,25 @@ +#[derive(Debug, PartialEq)] +pub enum Token { + Heading { level: usize, content: String }, + // TODO: ネストできるようにする + // MEMO: List と ListItem わけた方がいいかも? + ListItem(Vec), + OrderedList(Vec), + Bold(String), + Italic(String), + Link { text: String, url: String }, + CodeBlock { language: String, content: String }, + InlineCode(String), + // TODO: Token 渡せるようにする + Paragraph(String), + // TODO: Token 渡せるようにする + BlockQuote(String), + Image { src: String, alt: String }, + Break, + Empty, + Text(String), + + // === ここから下はカスタムのシンタックス === + LinkCard(String), + Twitter(String), +} diff --git a/markdown/src/tokenizer.rs b/markdown/src/tokenizer.rs new file mode 100644 index 0000000..deedd2a --- /dev/null +++ b/markdown/src/tokenizer.rs @@ -0,0 +1,229 @@ +use std::collections::VecDeque; + +use crate::{lexer::Lexer, token::Token}; + +pub fn tokenize(input: &str) -> VecDeque { + let mut lexer = Lexer::new(input); + let mut tokens = Vec::new(); + + while let Some(token) = lexer.next_token() { + tokens.push(token); + } + + tokens.into() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_tokenize_heading() { + let input = "# Title"; + let tokens = tokenize(input); + assert_eq!(tokens.len(), 1); + match tokens.get(0).unwrap() { + Token::Heading { level, content } => { + assert_eq!(*level, 1); + assert_eq!(content, "Title"); + } + _ => panic!("Unexpected token"), + } + } + + #[test] + fn test_tokenize_heading2() { + let input = "## Sub Title"; + let tokens = tokenize(input); + assert_eq!(tokens.len(), 1); + match tokens.get(0).unwrap() { + Token::Heading { level, content } => { + assert_eq!(*level, 2); + assert_eq!(content, "Sub Title"); + } + _ => panic!("Unexpected token"), + } + } + + #[test] + fn test_tokenize_paragraph() { + let input = "Normal"; + let tokens = tokenize(input); + assert_eq!(tokens.len(), 1); + match tokens.get(0).unwrap() { + Token::Paragraph(content) => { + assert_eq!(content, "Normal"); + } + _ => panic!("Unexpected token"), + } + } + + #[test] + fn test_list() { + let input = "* Item 1\n"; + let tokens = tokenize(input); + assert_eq!(tokens.len(), 1); + println!("{:?}", tokens); + match tokens.get(0).unwrap() { + Token::ListItem(content) => { + assert_eq!(content.len(), 1); + assert_eq!(content[0], Token::Text("Item 1".to_string())); + } + _ => panic!("Unexpected token"), + } + } + + // #[test] + // fn test_ordered_list() { + // let input = "1. Item 1\n"; + // let tokens = tokenize(input); + // assert_eq!(tokens.len(), 1); + // match tokens.get(0).unwrap() { + // Token::OrderedList(items) => { + // assert_eq!(items.len(), 1); + // assert_eq!(items[0], "Item 1"); + // } + // _ => panic!("Unexpected token"), + // } + // } + + #[test] + fn test_bold() { + let input = "**Bold text**"; + let tokens = tokenize(input); + assert_eq!(tokens.len(), 1); + match tokens.get(0).unwrap() { + Token::Bold(content) => { + assert_eq!(content, "Bold text"); + } + _ => panic!("Unexpected token"), + } + } + + #[test] + fn test_italic() { + let input = "*Italic text*"; + let tokens = tokenize(input); + assert_eq!(tokens.len(), 1); + match tokens.get(0).unwrap() { + Token::Italic(content) => { + assert_eq!(content, "Italic text"); + } + _ => panic!("Unexpected token"), + } + } + + #[test] + fn test_link() { + let input = "[Link text](https://example.com)"; + let tokens = tokenize(input); + assert_eq!(tokens.len(), 1); + match tokens.get(0).unwrap() { + Token::Link { text, url } => { + assert_eq!(text, "Link text"); + assert_eq!(url, "https://example.com"); + } + _ => panic!("Unexpected token"), + } + } + + #[test] + fn test_code_block() { + let input = "```rust\nfn main() {\n println!(\"Hello, world!\");\n}\n```"; + let tokens = tokenize(input); + assert_eq!(tokens.len(), 1); + match tokens.get(0).unwrap() { + Token::CodeBlock { language, content } => { + assert_eq!(language, "rust"); + assert_eq!( + content, + "fn main() {\n println!(\"Hello, world!\");\n}\n" + ); + } + _ => panic!("Unexpected token"), + } + } + + #[test] + fn test_inline_code() { + let input = "`let x = 42;`"; + let tokens = tokenize(input); + assert_eq!(tokens.len(), 1); + match tokens.get(0).unwrap() { + Token::InlineCode(content) => { + assert_eq!(content, "let x = 42;"); + } + _ => panic!("Unexpected token"), + } + } + + #[test] + fn test_block_quote() { + let input = "> This is a block quote."; + let tokens = tokenize(input); + assert_eq!(tokens.len(), 1); + match tokens.get(0).unwrap() { + Token::BlockQuote(content) => { + assert_eq!(content, "This is a block quote."); + } + _ => panic!("Unexpected token"), + } + } + + #[test] + fn test_image() { + let input = "![Alt text](https://example.com/image.jpg)"; + let tokens = tokenize(input); + assert_eq!(tokens.len(), 1); + match tokens.get(0).unwrap() { + Token::Image { src, alt } => { + assert_eq!(src, "https://example.com/image.jpg"); + assert_eq!(alt, "Alt text"); + } + _ => panic!("Unexpected token"), + } + } + + #[test] + fn test_tokenize_link_card() { + let input = "@og[https://example.com]"; + let tokens = tokenize(input); + println!("{:?}", tokens); + assert_eq!(tokens.len(), 1); + match tokens.get(0).unwrap() { + Token::LinkCard(content) => { + assert_eq!(content, "https://example.com"); + } + _ => panic!("Unexpected token"), + } + } + + #[test] + fn test_tokenize_twitter() { + let input = "@twitter[https://twitter.com/foo/status/11111111]"; + let tokens = tokenize(input); + println!("{:?}", tokens); + assert_eq!(tokens.len(), 1); + match tokens.get(0).unwrap() { + Token::Twitter(content) => { + assert_eq!(content, "https://twitter.com/foo/status/11111111"); + } + _ => panic!("Unexpected token"), + } + } + + #[test] + fn test_tokenize() { + let input = r#" +# Title +## Subtitle +### Sub-subtitle +* Item 1 +* Item 2 +* Item 3 +Normal text here. +"#; + let tokens = tokenize(input); + assert_eq!(tokens.len(), 7); + } +}