diff --git a/.circleci/config.yml b/.circleci/config.yml index edd45e4a44a3..7d5f14d4a51e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -155,7 +155,7 @@ jobs: echo "export CRYSTAL_SHA1=$CIRCLE_SHA1" >> build.env # Which previous version use - export PREVIOUS_CRYSTAL_BASE_URL="https://github.com/crystal-lang/crystal/releases/download/1.1.0/crystal-1.1.0-1" + export PREVIOUS_CRYSTAL_BASE_URL="https://github.com/crystal-lang/crystal/releases/download/1.1.1/crystal-1.1.1-1" echo "export PREVIOUS_CRYSTAL_RELEASE_LINUX64_TARGZ=${PREVIOUS_CRYSTAL_BASE_URL}-linux-x86_64.tar.gz" >> build.env echo "export PREVIOUS_CRYSTAL_RELEASE_LINUX32_TARGZ=${PREVIOUS_CRYSTAL_BASE_URL}-linux-i686.tar.gz" >> build.env echo "export PREVIOUS_CRYSTAL_RELEASE_DARWIN_TARGZ=${PREVIOUS_CRYSTAL_BASE_URL}-darwin-x86_64.tar.gz" >> build.env diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 0ce7355958ee..26470f99d8b9 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -5,4 +5,4 @@ contact_links: about: Questions about installing, using Crystal and any related issues. - name: "\U0001F4AC Crystal Community Chat" url: https://gitter.im/crystal-lang/crystal - about: "Get in touch with the community, ask for help and talk about Crystal. (IRC: #crystal-lang on freenode)" + about: "Get in touch with the community, ask for help and talk about Crystal. (IRC: #crystal-lang on libera.chat)" diff --git a/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md b/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md new file mode 100644 index 000000000000..5d76092f9bbf --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md @@ -0,0 +1,7 @@ +We thank you for helping improve Crystal. In order to ease the reviewing process, we invite you to read the [guidelines](https://github.com/crystal-lang/crystal/blob/master/CONTRIBUTING.md#making-good-pull-requests) and ask you to consider the following points before submitting a PR: + +1. We prefer to discuss the underlying issue _prior_ to discussing the code. Therefore, we kindly ask you to refer to an existing issue, or an existing discussion in a public space with members of the Core Team (forum, Gitter, Discord, ...). In few cases, we acknowledge that this might not be necessary, for instance when refactoring code or small bug fixes. In this case, the PR must include the same information an issue would have: a clear explanation of the issue, reproducible code, etc. + +2. Focus the PR to the referred issue, and restraint from adding unrelated changes/additions. We do welcome another PR if you fixed another issue. + +3. If your change is big, consider breaking it into several smaller PRs. In general, the smaller the change, the quicker we can review it. diff --git a/.github/workflows/win.yml b/.github/workflows/win.yml index 1b2d11627972..87a8f1d1dda0 100644 --- a/.github/workflows/win.yml +++ b/.github/workflows/win.yml @@ -5,7 +5,7 @@ on: [push, pull_request] jobs: linux-job: runs-on: ubuntu-latest - container: crystallang/crystal:1.1.0-build + container: crystallang/crystal:1.1.1-build steps: - name: Download Crystal source uses: actions/checkout@v2 diff --git a/CHANGELOG.md b/CHANGELOG.md index 17b5b85ef1c0..e38b233f42b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,23 @@ +# 1.1.1 (2021-07-26) + +## Language changes +- Revert name of top-level module to `main` ([#10993](https://github.com/crystal-lang/crystal/pull/10993), thanks @beta-ziliani) + +## Standard Library + +- Fix missing required args for `Socket::Addrinfo::Error.new` ([#10960](https://github.com/crystal-lang/crystal/pull/10960), thanks @straight-shoota) +- Fix disable unnecessary spec on win32 ([#10971](https://github.com/crystal-lang/crystal/pull/10971), thanks @straight-shoota) +- Remove incorrect type restrictions on index methods with offset ([#10972](https://github.com/crystal-lang/crystal/pull/10972), thanks @straight-shoota) +- Fix: documentation of `#step` in `Number` and `Char` ([#10966](https://github.com/crystal-lang/crystal/pull/10966), [#11006](https://github.com/crystal-lang/crystal/pull/11006), thanks @beta-ziliani and @straight-shoota) + +## Compiler + +- Fix parsing macro body with escaped backslash in literal ([#10995](https://github.com/crystal-lang/crystal/pull/10995), thanks @straight-shoota) + +## Other + +- Updating aarch64 actions to use 1.0.0 images ([#10976](https://github.com/crystal-lang/crystal/pull/10976), thanks @beta-ziliani) + # 1.1.0 (2021-07-14) ## Language changes diff --git a/Makefile b/Makefile index 8e465e846327..ac3a8b670ae3 100644 --- a/Makefile +++ b/Makefile @@ -31,13 +31,16 @@ SPEC_SOURCES := $(shell find spec -name '*.cr') override FLAGS += $(if $(release),--release )$(if $(stats),--stats )$(if $(progress),--progress )$(if $(threads),--threads $(threads) )$(if $(debug),-d )$(if $(static),--static )$(if $(LDFLAGS),--link-flags="$(LDFLAGS)" )$(if $(target),--cross-compile --target $(target) ) SPEC_WARNINGS_OFF := --exclude-warnings spec/std --exclude-warnings spec/compiler SPEC_FLAGS := $(if $(verbose),-v )$(if $(junit_output),--junit_output $(junit_output) ) -CRYSTAL_CONFIG_LIBRARY_PATH := $(shell bin/crystal env CRYSTAL_LIBRARY_PATH 2> /dev/null) +CRYSTAL_CONFIG_LIBRARY_PATH := '$$ORIGIN/../lib/crystal' CRYSTAL_CONFIG_BUILD_COMMIT := $(shell git rev-parse --short HEAD 2> /dev/null) +CRYSTAL_CONFIG_PATH := '$$ORIGIN/../share/crystal/src' SOURCE_DATE_EPOCH := $(shell (git show -s --format=%ct HEAD || stat -c "%Y" Makefile || stat -f "%m" Makefile) 2> /dev/null) EXPORTS := \ - CRYSTAL_CONFIG_LIBRARY_PATH="$(CRYSTAL_CONFIG_LIBRARY_PATH)" \ CRYSTAL_CONFIG_BUILD_COMMIT="$(CRYSTAL_CONFIG_BUILD_COMMIT)" \ + CRYSTAL_CONFIG_PATH=$(CRYSTAL_CONFIG_PATH) \ SOURCE_DATE_EPOCH="$(SOURCE_DATE_EPOCH)" +EXPORTS_BUILD := \ + CRYSTAL_CONFIG_LIBRARY_PATH=$(CRYSTAL_CONFIG_LIBRARY_PATH) SHELL = sh LLVM_CONFIG := $(shell src/llvm/ext/find-llvm-config) LLVM_EXT_DIR = src/llvm/ext @@ -116,7 +119,7 @@ $(O)/compiler_spec: $(DEPS) $(SOURCES) $(SPEC_SOURCES) $(O)/crystal: $(DEPS) $(SOURCES) @mkdir -p $(O) - $(EXPORTS) ./bin/crystal build $(FLAGS) -o $@ src/compiler/crystal.cr -D without_openssl -D without_zlib + $(EXPORTS) $(EXPORTS_BUILD) ./bin/crystal build $(FLAGS) -o $@ src/compiler/crystal.cr -D without_openssl -D without_zlib $(LLVM_EXT_OBJ): $(LLVM_EXT_DIR)/llvm_ext.cc $(CXX) -c $(CXXFLAGS) -o $@ $< $(shell $(LLVM_CONFIG) --cxxflags) diff --git a/README.md b/README.md index 3ed884e701f5..9aed990e3e10 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ Documentation Community --------- -Questions or suggestions? Ask on the [Crystal Forum](https://forum.crystal-lang.org), on our [Gitter channel](https://gitter.im/crystal-lang/crystal) or IRC channel [#crystal-lang](http://webchat.freenode.net/?channels=#crystal-lang) at chat.freenode.net, or on Stack Overflow under the [crystal-lang](http://stackoverflow.com/questions/tagged/crystal-lang) tag. There is also an archived [Google Group](https://groups.google.com/forum/?fromgroups#!forum/crystal-lang). +Questions or suggestions? Ask on the [Crystal Forum](https://forum.crystal-lang.org), on our [Gitter channel](https://gitter.im/crystal-lang/crystal) or IRC channel [#crystal-lang](https://web.libera.chat/#crystal-lang) at irc.libera.chat, or on Stack Overflow under the [crystal-lang](http://stackoverflow.com/questions/tagged/crystal-lang) tag. There is also an archived [Google Group](https://groups.google.com/forum/?fromgroups#!forum/crystal-lang). Contributing ------------ diff --git a/bin/ci b/bin/ci index d51ce139a8c2..d80043fb69a9 100755 --- a/bin/ci +++ b/bin/ci @@ -133,8 +133,8 @@ format() { prepare_build() { on_linux verify_linux_environment - on_osx curl -L https://github.com/crystal-lang/crystal/releases/download/1.1.0/crystal-1.1.0-1-darwin-x86_64.tar.gz -o ~/crystal.tar.gz - on_osx 'pushd ~;gunzip -c ~/crystal.tar.gz | tar xopf -;mv crystal-1.1.0-1 crystal;popd' + on_osx curl -L https://github.com/crystal-lang/crystal/releases/download/1.1.1/crystal-1.1.1-1-darwin-x86_64.tar.gz -o ~/crystal.tar.gz + on_osx 'pushd ~;gunzip -c ~/crystal.tar.gz | tar xopf -;mv crystal-1.1.1-1 crystal;popd' # These commands may take a few minutes to run due to the large size of the repositories. # This restriction has been made on GitHub's request because updating shallow @@ -187,7 +187,7 @@ with_build_env() { on_linux verify_linux_environment - export DOCKER_TEST_PREFIX="${DOCKER_TEST_PREFIX:=crystallang/crystal:1.1.0}" + export DOCKER_TEST_PREFIX="${DOCKER_TEST_PREFIX:=crystallang/crystal:1.1.1}" case $ARCH in x86_64) diff --git a/bin/crystal b/bin/crystal index ec70846e497d..db27e2a050a8 100755 --- a/bin/crystal +++ b/bin/crystal @@ -147,11 +147,13 @@ export CRYSTAL_HAS_WRAPPER=true export CRYSTAL="${CRYSTAL:-"crystal"}" -if [ -z "$CRYSTAL_CONFIG_LIBRARY_PATH" ]; then - export CRYSTAL_CONFIG_LIBRARY_PATH="$( - export PATH="$(remove_path_item "$(remove_path_item "$PATH" "$SCRIPT_ROOT")" "bin")" - crystal env CRYSTAL_LIBRARY_PATH || echo "" - )" +if [ -z "$CRYSTAL_CONFIG_LIBRARY_PATH" ] || [ -z "$CRYSTAL_LIBRARY_PATH" ]; then + CRYSTAL_INSTALLED_LIBRARY_PATH="$( + export PATH="$(remove_path_item "$(remove_path_item "$PATH" "$SCRIPT_ROOT")" "bin")" + crystal env CRYSTAL_LIBRARY_PATH || echo "" + )" + export CRYSTAL_LIBRARY_PATH=${CRYSTAL_LIBRARY_PATH:-$CRYSTAL_INSTALLED_LIBRARY_PATH} + export CRYSTAL_CONFIG_LIBRARY_PATH=${CRYSTAL_CONFIG_LIBRARY_PATH:-$CRYSTAL_INSTALLED_LIBRARY_PATH} fi if [ -x "$CRYSTAL_DIR/crystal" ]; then diff --git a/lib/markd/.github/workflows/linux-ci.yml b/lib/markd/.github/workflows/linux-ci.yml new file mode 100644 index 000000000000..3d2c934b17ec --- /dev/null +++ b/lib/markd/.github/workflows/linux-ci.yml @@ -0,0 +1,34 @@ +name: Linux CI +on: + push: + paths-ignore: + - "benchmarks/**" + branches: + - "master" + pull_request: + branches: "*" + +jobs: + specs: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + crystal: [ '1.0.0', 'latest', 'nightly' ] + name: Crystal ${{ matrix.crystal }} tests + steps: + - uses: actions/checkout@master + - uses: oprypin/install-crystal@v1 + with: + crystal: ${{ matrix.crystal }} + - name: Install dependencies + run: shards install + - name: Run tests + run: crystal spec --error-on-warnings --error-trace + - name: Run code format check + run: | + if ! crystal tool format --check; then + crystal tool format + git diff + exit 1 + fi diff --git a/lib/markd/.github/workflows/release-version.yml b/lib/markd/.github/workflows/release-version.yml new file mode 100644 index 000000000000..c27f8da4a869 --- /dev/null +++ b/lib/markd/.github/workflows/release-version.yml @@ -0,0 +1,22 @@ +name: Deploy new release +on: + push: + tags: + - "v*" + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@master + - name: Create Release + id: create_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: ${{ github.ref }} + release_name: Release ${{ github.ref }} + draft: false + prerelease: false + diff --git a/lib/markd/.gitignore b/lib/markd/.gitignore new file mode 100644 index 000000000000..a4671c01443b --- /dev/null +++ b/lib/markd/.gitignore @@ -0,0 +1,12 @@ +/doc/ +/lib/ +/bin/ +/.shards/ +/src/main.cr + +# Libraries don't need dependency lock +# Dependencies will be locked in application that uses them +/shard.lock + +# vscode +/.history/ diff --git a/lib/markd/.vscode/launch.json b/lib/markd/.vscode/launch.json new file mode 100644 index 000000000000..3ab15c3de50e --- /dev/null +++ b/lib/markd/.vscode/launch.json @@ -0,0 +1,13 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "type": "lldb", + "request": "launch", + "name": "Launch", + "program": "${workspaceRoot}/bin/main", + "args": [], + "cwd": "${workspaceRoot}" + } + ] +} diff --git a/lib/markd/CHANGELOG.md b/lib/markd/CHANGELOG.md new file mode 100644 index 000000000000..dfc7a68abd11 --- /dev/null +++ b/lib/markd/CHANGELOG.md @@ -0,0 +1,61 @@ +# Change Log + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### TODO + +- GFM support + +## [0.4.0] (2021-03-23) + +- Compatibility with Crystal 1.0. #[34](https://github.com/icyleaf/markd/pull/34) thanks @[bcardiff](https://github.com/bcardiff). + +## [0.3.0] (2021-03-02) + +No changelog. + +## [0.2.1] (2020-08-24) + +### Added + +- Add Options#base_url to allow resolving relative links. #[26](https://github.com/icyleaf/markd/pull/26), #[28](https://github.com/icyleaf/markd/pull/28) thanks @[straight-shoota](https://github.com/straight-shoota). + +### Fixed + +- [high severity] escape unsafe html entry inline of code block. #[32](https://github.com/icyleaf/markd/pull/32). +- Fixed some typos in README. #[29](https://github.com/icyleaf/markd/pull/29) thanks @[Calamari](https://github.com/Calamari). + +## [0.2.0] (2019-10-08) + +### Changed + +- Optimizations speed. many thanks @[asterite](https://github.com/asterite). #[19](https://github.com/icyleaf/markd/pull/19) + +### Fixed + +- Compatibility with Crystal 0.31. #[22](https://github.com/icyleaf/markd/pull/22). + +## [0.1.2] (2019-08-26) + +- Use Crystal v0.31.0 as default complier. + +## [0.1.1] (2017-12-26) + +- Minor refactoring and improving speed. thanks @[straight-shoota](https://github.com/straight-shoota). +- Use Crystal v0.24.1 as default complier. + +## 0.1.0 (2017-09-22) + +- [initial implementation](https://github.com/icyleaf/markd/milestone/1?closed=1) + +[Unreleased]: https://github.com/icyleaf/markd/compare/v0.3.0...HEAD +[0.3.0]: https://github.com/icyleaf/halite/compare/v0.2.1...v0.3.0 +[0.2.1]: https://github.com/icyleaf/halite/compare/v0.2.0...v0.2.1 +[0.2.0]: https://github.com/icyleaf/halite/compare/v0.1.2...v0.2.0 +[0.1.2]: https://github.com/icyleaf/halite/compare/v0.1.1...v0.1.2 +[0.1.1]: https://github.com/icyleaf/halite/compare/v0.1.0...v0.1.1 diff --git a/lib/markd/LICENSE b/lib/markd/LICENSE new file mode 100644 index 000000000000..317898da1289 --- /dev/null +++ b/lib/markd/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2017-present icyleaf + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/lib/markd/README.md b/lib/markd/README.md new file mode 100644 index 000000000000..ac31e460c625 --- /dev/null +++ b/lib/markd/README.md @@ -0,0 +1,111 @@ +# markd + +[![Language](https://img.shields.io/badge/language-crystal-776791.svg)](https://github.com/crystal-lang/crystal) +[![Tag](https://img.shields.io/github/tag/icyleaf/markd.svg)](https://github.com/icyleaf/markd/blob/master/CHANGELOG.md) +[![Build Status](https://img.shields.io/circleci/project/github/icyleaf/markd/master.svg?style=flat)](https://circleci.com/gh/icyleaf/markd) + +Yet another markdown parser built for speed, written in [Crystal](https://crystal-lang.org), Compliant to [CommonMark](http://spec.commonmark.org) specification (`v0.27`). Copy from [commonmark.js](https://github.com/jgm/commonmark.js). + +## Installation + +Add this to your application's `shard.yml`: + +```yaml +dependencies: + markd: + github: icyleaf/markd +``` + +## Quick start + +```crystal +require "markd" + +markdown = <<-MD +# Hello Markd + +> Yet another markdown parser built for speed, written in Crystal, Compliant to CommonMark specification. +MD + +html = Markd.to_html(markdown) +``` + +Also here are options to configure the parse and render. + +```crystal +options = Markd::Options.new(smart: true, safe: true) +Markd.to_html(markdown, options) +``` + +## Options + +| Name | Type | Default value | Description | +| ----------- | ------ | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| time | `Bool` | false | render parse cost time during read source, parse blocks, parse inline. | +| smart | `Bool` | false | if **true**, straight quotes will be made curly,
`--` will be changed to an en dash,
`---` will be changed to an em dash, and
`...` will be changed to ellipses. | +| source_pos | `Bool` | false | if **true**, source position information for block-level elements
will be rendered in the data-sourcepos attribute (for HTML) | +| safe | `Bool` | false | if **true**, raw HTML will not be passed through to HTML output (it will be replaced by comments) | +| prettyprint | `Bool` | false | if **true**, code tags generated by code blocks will have a `prettyprint` class added to them, to be used by [Google code-prettify](https://github.com/google/code-prettify). | +| gfm | `Bool` | false | **Not supported for now** | +| toc | `Bool` | false | **Not supported for now** | +| base_url | `URI?` | nil | if not **nil**, relative URLs of links are resolved against this `URI`. It act's like HTML's `` in the context of a Markdown document. | + +## Advanced + +If you want to use a custom renderer, it can! + +```crystal + +class CustomRenderer < Markd::Renderer + + def strong(node, entering) + end + + # more methods following in render. +end + +options = Markd::Options.new(time: true) +document = Markd::Parser.parse(markdown, options) +renderer = CustomRenderer.new(options) + +html = renderer.render(document) +``` + +## Performance + +First of all, Markd is slower than [Crystal Built-in Markdown](https://crystal-lang.org/api/0.23.0/Markdown.html) which it is a lite version, only apply for generte Cystal documents ([#4496](https://github.com/crystal-lang/crystal/pull/4496), [#4613](https://github.com/crystal-lang/crystal/issues/4613)). + +Here is the result of [a sample markdown file](benchmarks/source.md) parse at MacBook Pro Retina 2015 (2.2 GHz): + +``` +Crystal Markdown 3.28k (305.29µs) (± 0.92%) fastest + Markd 305.36 ( 3.27ms) (± 5.52%) 10.73× slower +``` + +Recently, I'm working to compare the other popular commonmark parser, the code is stored in [benchmarks](/benchmarks). + +## Donate + +Markd is an open source, collaboratively funded project. If you run a business and are using Markd in a revenue-generating product, +it would make business sense to sponsor Markd development. Individual users are also welcome to make a one time donation +if Markd has helped you in your work or personal projects. + +You can donate via [Paypal](https://www.paypal.me/icyleaf/5). + +## How to Contribute + +Your contributions are always welcome! Please submit a pull request or create an issue to add a new question, bug or feature to the list. + +All [Contributors](https://github.com/icyleaf/markd/graphs/contributors) are on the wall. + +## You may also like + +- [halite](https://github.com/icyleaf/halite) - HTTP Requests Client with a chainable REST API, built-in sessions and middlewares. +- [totem](https://github.com/icyleaf/totem) - Load and parse a configuration file or string in JSON, YAML, dotenv formats. +- [poncho](https://github.com/icyleaf/poncho) - A .env parser/loader improved for performance. +- [popcorn](https://github.com/icyleaf/popcorn) - Easy and Safe casting from one type to another. +- [fast-crystal](https://github.com/icyleaf/fast-crystal) - 💨 Writing Fast Crystal 😍 -- Collect Common Crystal idioms. + +## License + +[MIT License](https://github.com/icyleaf/markd/blob/master/LICENSE) © icyleaf diff --git a/lib/markd/shard.yml b/lib/markd/shard.yml new file mode 100644 index 000000000000..4f1aff60233f --- /dev/null +++ b/lib/markd/shard.yml @@ -0,0 +1,9 @@ +name: markd +version: 0.4.0 + +authors: + - icyleaf + +crystal: 1.0.0 + +license: MIT diff --git a/lib/markd/spec/api_spec.cr b/lib/markd/spec/api_spec.cr new file mode 100644 index 000000000000..9212a17c15d4 --- /dev/null +++ b/lib/markd/spec/api_spec.cr @@ -0,0 +1,21 @@ +require "spec" +require "../src/markd" + +describe Markd::Options do + describe "#base_url" do + it "it disabled by default" do + options = Markd::Options.new + Markd.to_html("[foo](bar)", options).should eq %(

foo

\n) + Markd.to_html("![](bar)", options).should eq %(

\n) + end + + it "absolutizes relative urls" do + options = Markd::Options.new + options.base_url = URI.parse("http://example.com") + Markd.to_html("[foo](bar)", options).should eq %(

foo

\n) + Markd.to_html("[foo](https://example.com/baz)", options).should eq %(

foo

\n) + Markd.to_html("![](bar)", options).should eq %(

\n) + Markd.to_html("![](https://example.com/baz)", options).should eq %(

\n) + end + end +end diff --git a/lib/markd/spec/fixtures/regression.txt b/lib/markd/spec/fixtures/regression.txt new file mode 100644 index 000000000000..dd1496648009 --- /dev/null +++ b/lib/markd/spec/fixtures/regression.txt @@ -0,0 +1,81 @@ +# Regression tests + +Eating a character after a partially consumed tab. + +```````````````````````````````` example +* foo +→bar +. + +```````````````````````````````` + +Type 7 HTML block followed by whitespace (#98). + +```````````````````````````````` example + +x +. + +x +```````````````````````````````` + +h2..h6 raw HTML blocks (jgm/CommonMark#430). + +```````````````````````````````` example +

lorem

+ +

lorem

+ +

lorem

+ +

lorem

+ +
lorem
+ +
lorem
+. +

lorem

+

lorem

+

lorem

+

lorem

+
lorem
+
lorem
+```````````````````````````````` + +Issue #109 - tabs after setext header line + + +```````````````````````````````` example +hi +--→ +. +

hi

+```````````````````````````````` + +Issue #108 - Chinese punctuation not recognized + +```````````````````````````````` example +**。**话 +. +

**。**话

+```````````````````````````````` + +Issue jgm/cmark#177 - incorrect emphasis parsing + +```````````````````````````````` example +a***b* c* +. +

a*b c

+```````````````````````````````` + +Issue jgm/CommonMark#468 - backslash at end of link definition + + +```````````````````````````````` example +[\]: test +. +

[]: test

+```````````````````````````````` diff --git a/lib/markd/spec/fixtures/smart_punct.txt b/lib/markd/spec/fixtures/smart_punct.txt new file mode 100644 index 000000000000..3522c94c28c0 --- /dev/null +++ b/lib/markd/spec/fixtures/smart_punct.txt @@ -0,0 +1,168 @@ +## Smart punctuation + +Open quotes are matched with closed quotes. +The same method is used for matching openers and closers +as is used in emphasis parsing: + +```````````````````````````````` example +"Hello," said the spider. +"'Shelob' is my name." +. +

“Hello,” said the spider. +“‘Shelob’ is my name.”

+```````````````````````````````` + +```````````````````````````````` example +'A', 'B', and 'C' are letters. +. +

‘A’, ‘B’, and ‘C’ are letters.

+```````````````````````````````` + +```````````````````````````````` example +'Oak,' 'elm,' and 'beech' are names of trees. +So is 'pine.' +. +

‘Oak,’ ‘elm,’ and ‘beech’ are names of trees. +So is ‘pine.’

+```````````````````````````````` + +```````````````````````````````` example +'He said, "I want to go."' +. +

‘He said, “I want to go.”’

+```````````````````````````````` + +A single quote that isn't an open quote matched +with a close quote will be treated as an +apostrophe: + +```````````````````````````````` example +Were you alive in the 70's? +. +

Were you alive in the 70’s?

+```````````````````````````````` + +```````````````````````````````` example +Here is some quoted '`code`' and a "[quoted link](url)". +. +

Here is some quoted ‘code’ and a “quoted link”.

+```````````````````````````````` + +Here the first `'` is treated as an apostrophe, not +an open quote, because the final single quote is matched +by the single quote before `jolly`: + +```````````````````````````````` example +'tis the season to be 'jolly' +. +

’tis the season to be ‘jolly’

+```````````````````````````````` + +Multiple apostrophes should not be marked as open/closing quotes. + +```````````````````````````````` example +'We'll use Jane's boat and John's truck,' Jenna said. +. +

‘We’ll use Jane’s boat and John’s truck,’ Jenna said.

+```````````````````````````````` + +An unmatched double quote will be interpreted as a +left double quote, to facilitate this style: + +```````````````````````````````` example +"A paragraph with no closing quote. + +"Second paragraph by same speaker, in fiction." +. +

“A paragraph with no closing quote.

+

“Second paragraph by same speaker, in fiction.”

+```````````````````````````````` + +Quotes that are escaped come out as literal straight +quotes: + +```````````````````````````````` example +\"This is not smart.\" +This isn\'t either. +5\'8\" +. +

"This is not smart." +This isn't either. +5'8"

+```````````````````````````````` + +Two hyphens form an en-dash, three an em-dash. + +```````````````````````````````` example +Some dashes: em---em +en--en +em --- em +en -- en +2--3 +. +

Some dashes: em—em +en–en +em — em +en – en +2–3

+```````````````````````````````` + +A sequence of more than three hyphens is +parsed as a sequence of em and/or en dashes, +with no hyphens. If possible, a homogeneous +sequence of dashes is used (so, 10 hyphens += 5 en dashes, and 9 hyphens = 3 em dashes). +When a heterogeneous sequence must be used, +the em dashes come first, followed by the en +dashes, and as few en dashes as possible are +used (so, 7 hyphens = 2 em dashes an 1 en +dash). + +```````````````````````````````` example +one- +two-- +three--- +four---- +five----- +six------ +seven------- +eight-------- +nine--------- +thirteen-------------. +. +

one- +two– +three— +four–– +five—– +six—— +seven—–– +eight–––– +nine——— +thirteen———––.

+```````````````````````````````` + +Hyphens can be escaped: + +```````````````````````````````` example +Escaped hyphens: \-- \-\-\-. +. +

Escaped hyphens: -- ---.

+```````````````````````````````` + +Three periods form an ellipsis: + +```````````````````````````````` example +Ellipses...and...and.... +. +

Ellipses…and…and….

+```````````````````````````````` + +Periods can be escaped if ellipsis-formation +is not wanted: + +```````````````````````````````` example +No ellipses\.\.\. +. +

No ellipses...

+```````````````````````````````` diff --git a/lib/markd/spec/fixtures/spec.txt b/lib/markd/spec/fixtures/spec.txt new file mode 100644 index 000000000000..857e92c32d3b --- /dev/null +++ b/lib/markd/spec/fixtures/spec.txt @@ -0,0 +1,9353 @@ +--- +title: CommonMark Spec +author: John MacFarlane +version: 0.27 +date: '2016-11-18' +license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)' +... + +# Introduction + +## What is Markdown? + +Markdown is a plain text format for writing structured documents, +based on conventions used for indicating formatting in email and +usenet posts. It was developed in 2004 by John Gruber, who wrote +the first Markdown-to-HTML converter in Perl, and it soon became +ubiquitous. In the next decade, dozens of implementations were +developed in many languages. Some extended the original +Markdown syntax with conventions for footnotes, tables, and +other document elements. Some allowed Markdown documents to be +rendered in formats other than HTML. Websites like Reddit, +StackOverflow, and GitHub had millions of people using Markdown. +And Markdown started to be used beyond the web, to author books, +articles, slide shows, letters, and lecture notes. + +What distinguishes Markdown from many other lightweight markup +syntaxes, which are often easier to write, is its readability. +As Gruber writes: + +> The overriding design goal for Markdown's formatting syntax is +> to make it as readable as possible. The idea is that a +> Markdown-formatted document should be publishable as-is, as +> plain text, without looking like it's been marked up with tags +> or formatting instructions. +> () + +The point can be illustrated by comparing a sample of +[AsciiDoc](http://www.methods.co.nz/asciidoc/) with +an equivalent sample of Markdown. Here is a sample of +AsciiDoc from the AsciiDoc manual: + +``` +1. List item one. ++ +List item one continued with a second paragraph followed by an +Indented block. ++ +................. +$ ls *.sh +$ mv *.sh ~/tmp +................. ++ +List item continued with a third paragraph. + +2. List item two continued with an open block. ++ +-- +This paragraph is part of the preceding list item. + +a. This list is nested and does not require explicit item +continuation. ++ +This paragraph is part of the preceding list item. + +b. List item b. + +This paragraph belongs to item two of the outer list. +-- +``` + +And here is the equivalent in Markdown: +``` +1. List item one. + + List item one continued with a second paragraph followed by an + Indented block. + + $ ls *.sh + $ mv *.sh ~/tmp + + List item continued with a third paragraph. + +2. List item two continued with an open block. + + This paragraph is part of the preceding list item. + + 1. This list is nested and does not require explicit item continuation. + + This paragraph is part of the preceding list item. + + 2. List item b. + + This paragraph belongs to item two of the outer list. +``` + +The AsciiDoc version is, arguably, easier to write. You don't need +to worry about indentation. But the Markdown version is much easier +to read. The nesting of list items is apparent to the eye in the +source, not just in the processed document. + +## Why is a spec needed? + +John Gruber's [canonical description of Markdown's +syntax](http://daringfireball.net/projects/markdown/syntax) +does not specify the syntax unambiguously. Here are some examples of +questions it does not answer: + +1. How much indentation is needed for a sublist? The spec says that + continuation paragraphs need to be indented four spaces, but is + not fully explicit about sublists. It is natural to think that + they, too, must be indented four spaces, but `Markdown.pl` does + not require that. This is hardly a "corner case," and divergences + between implementations on this issue often lead to surprises for + users in real documents. (See [this comment by John + Gruber](http://article.gmane.org/gmane.text.markdown.general/1997).) + +2. Is a blank line needed before a block quote or heading? + Most implementations do not require the blank line. However, + this can lead to unexpected results in hard-wrapped text, and + also to ambiguities in parsing (note that some implementations + put the heading inside the blockquote, while others do not). + (John Gruber has also spoken [in favor of requiring the blank + lines](http://article.gmane.org/gmane.text.markdown.general/2146).) + +3. Is a blank line needed before an indented code block? + (`Markdown.pl` requires it, but this is not mentioned in the + documentation, and some implementations do not require it.) + + ``` markdown + paragraph + code? + ``` + +4. What is the exact rule for determining when list items get + wrapped in `

` tags? Can a list be partially "loose" and partially + "tight"? What should we do with a list like this? + + ``` markdown + 1. one + + 2. two + 3. three + ``` + + Or this? + + ``` markdown + 1. one + - a + + - b + 2. two + ``` + + (There are some relevant comments by John Gruber + [here](http://article.gmane.org/gmane.text.markdown.general/2554).) + +5. Can list markers be indented? Can ordered list markers be right-aligned? + + ``` markdown + 8. item 1 + 9. item 2 + 10. item 2a + ``` + +6. Is this one list with a thematic break in its second item, + or two lists separated by a thematic break? + + ``` markdown + * a + * * * * * + * b + ``` + +7. When list markers change from numbers to bullets, do we have + two lists or one? (The Markdown syntax description suggests two, + but the perl scripts and many other implementations produce one.) + + ``` markdown + 1. fee + 2. fie + - foe + - fum + ``` + +8. What are the precedence rules for the markers of inline structure? + For example, is the following a valid link, or does the code span + take precedence ? + + ``` markdown + [a backtick (`)](/url) and [another backtick (`)](/url). + ``` + +9. What are the precedence rules for markers of emphasis and strong + emphasis? For example, how should the following be parsed? + + ``` markdown + *foo *bar* baz* + ``` + +10. What are the precedence rules between block-level and inline-level + structure? For example, how should the following be parsed? + + ``` markdown + - `a long code span can contain a hyphen like this + - and it can screw things up` + ``` + +11. Can list items include section headings? (`Markdown.pl` does not + allow this, but does allow blockquotes to include headings.) + + ``` markdown + - # Heading + ``` + +12. Can list items be empty? + + ``` markdown + * a + * + * b + ``` + +13. Can link references be defined inside block quotes or list items? + + ``` markdown + > Blockquote [foo]. + > + > [foo]: /url + ``` + +14. If there are multiple definitions for the same reference, which takes + precedence? + + ``` markdown + [foo]: /url1 + [foo]: /url2 + + [foo][] + ``` + +In the absence of a spec, early implementers consulted `Markdown.pl` +to resolve these ambiguities. But `Markdown.pl` was quite buggy, and +gave manifestly bad results in many cases, so it was not a +satisfactory replacement for a spec. + +Because there is no unambiguous spec, implementations have diverged +considerably. As a result, users are often surprised to find that +a document that renders one way on one system (say, a github wiki) +renders differently on another (say, converting to docbook using +pandoc). To make matters worse, because nothing in Markdown counts +as a "syntax error," the divergence often isn't discovered right away. + +## About this document + +This document attempts to specify Markdown syntax unambiguously. +It contains many examples with side-by-side Markdown and +HTML. These are intended to double as conformance tests. An +accompanying script `spec_tests.py` can be used to run the tests +against any Markdown program: + + python test/spec_tests.py --spec spec.txt --program PROGRAM + +Since this document describes how Markdown is to be parsed into +an abstract syntax tree, it would have made sense to use an abstract +representation of the syntax tree instead of HTML. But HTML is capable +of representing the structural distinctions we need to make, and the +choice of HTML for the tests makes it possible to run the tests against +an implementation without writing an abstract syntax tree renderer. + +This document is generated from a text file, `spec.txt`, written +in Markdown with a small extension for the side-by-side tests. +The script `tools/makespec.py` can be used to convert `spec.txt` into +HTML or CommonMark (which can then be converted into other formats). + +In the examples, the `→` character is used to represent tabs. + +# Preliminaries + +## Characters and lines + +Any sequence of [characters] is a valid CommonMark +document. + +A [character](@) is a Unicode code point. Although some +code points (for example, combining accents) do not correspond to +characters in an intuitive sense, all code points count as characters +for purposes of this spec. + +This spec does not specify an encoding; it thinks of lines as composed +of [characters] rather than bytes. A conforming parser may be limited +to a certain encoding. + +A [line](@) is a sequence of zero or more [characters] +other than newline (`U+000A`) or carriage return (`U+000D`), +followed by a [line ending] or by the end of file. + +A [line ending](@) is a newline (`U+000A`), a carriage return +(`U+000D`) not followed by a newline, or a carriage return and a +following newline. + +A line containing no characters, or a line containing only spaces +(`U+0020`) or tabs (`U+0009`), is called a [blank line](@). + +The following definitions of character classes will be used in this spec: + +A [whitespace character](@) is a space +(`U+0020`), tab (`U+0009`), newline (`U+000A`), line tabulation (`U+000B`), +form feed (`U+000C`), or carriage return (`U+000D`). + +[Whitespace](@) is a sequence of one or more [whitespace +characters]. + +A [Unicode whitespace character](@) is +any code point in the Unicode `Zs` class, or a tab (`U+0009`), +carriage return (`U+000D`), newline (`U+000A`), or form feed +(`U+000C`). + +[Unicode whitespace](@) is a sequence of one +or more [Unicode whitespace characters]. + +A [space](@) is `U+0020`. + +A [non-whitespace character](@) is any character +that is not a [whitespace character]. + +An [ASCII punctuation character](@) +is `!`, `"`, `#`, `$`, `%`, `&`, `'`, `(`, `)`, +`*`, `+`, `,`, `-`, `.`, `/`, `:`, `;`, `<`, `=`, `>`, `?`, `@`, +`[`, `\`, `]`, `^`, `_`, `` ` ``, `{`, `|`, `}`, or `~`. + +A [punctuation character](@) is an [ASCII +punctuation character] or anything in +the Unicode classes `Pc`, `Pd`, `Pe`, `Pf`, `Pi`, `Po`, or `Ps`. + +## Tabs + +Tabs in lines are not expanded to [spaces]. However, +in contexts where whitespace helps to define block structure, +tabs behave as if they were replaced by spaces with a tab stop +of 4 characters. + +Thus, for example, a tab can be used instead of four spaces +in an indented code block. (Note, however, that internal +tabs are passed through as literal tabs, not expanded to +spaces.) + +```````````````````````````````` example +→foo→baz→→bim +. +

foo→baz→→bim
+
+```````````````````````````````` + +```````````````````````````````` example + →foo→baz→→bim +. +
foo→baz→→bim
+
+```````````````````````````````` + +```````````````````````````````` example + a→a + ὐ→a +. +
a→a
+ὐ→a
+
+```````````````````````````````` + +In the following example, a continuation paragraph of a list +item is indented with a tab; this has exactly the same effect +as indentation with four spaces would: + +```````````````````````````````` example + - foo + +→bar +. + +```````````````````````````````` + +```````````````````````````````` example +- foo + +→→bar +. + +```````````````````````````````` + +Normally the `>` that begins a block quote may be followed +optionally by a space, which is not considered part of the +content. In the following case `>` is followed by a tab, +which is treated as if it were expanded into spaces. +Since one of theses spaces is considered part of the +delimiter, `foo` is considered to be indented six spaces +inside the block quote context, so we get an indented +code block starting with two spaces. + +```````````````````````````````` example +>→→foo +. +
+
  foo
+
+
+```````````````````````````````` + +```````````````````````````````` example +-→→foo +. + +```````````````````````````````` + + +```````````````````````````````` example + foo +→bar +. +
foo
+bar
+
+```````````````````````````````` + +```````````````````````````````` example + - foo + - bar +→ - baz +. + +```````````````````````````````` + +```````````````````````````````` example +#→Foo +. +

Foo

+```````````````````````````````` + +```````````````````````````````` example +*→*→*→ +. +
+```````````````````````````````` + + +## Insecure characters + +For security reasons, the Unicode character `U+0000` must be replaced +with the REPLACEMENT CHARACTER (`U+FFFD`). + +# Blocks and inlines + +We can think of a document as a sequence of +[blocks](@)---structural elements like paragraphs, block +quotations, lists, headings, rules, and code blocks. Some blocks (like +block quotes and list items) contain other blocks; others (like +headings and paragraphs) contain [inline](@) content---text, +links, emphasized text, images, code, and so on. + +## Precedence + +Indicators of block structure always take precedence over indicators +of inline structure. So, for example, the following is a list with +two items, not a list with one item containing a code span: + +```````````````````````````````` example +- `one +- two` +. + +```````````````````````````````` + + +This means that parsing can proceed in two steps: first, the block +structure of the document can be discerned; second, text lines inside +paragraphs, headings, and other block constructs can be parsed for inline +structure. The second step requires information about link reference +definitions that will be available only at the end of the first +step. Note that the first step requires processing lines in sequence, +but the second can be parallelized, since the inline parsing of +one block element does not affect the inline parsing of any other. + +## Container blocks and leaf blocks + +We can divide blocks into two types: +[container block](@)s, +which can contain other blocks, and [leaf block](@)s, +which cannot. + +# Leaf blocks + +This section describes the different kinds of leaf block that make up a +Markdown document. + +## Thematic breaks + +A line consisting of 0-3 spaces of indentation, followed by a sequence +of three or more matching `-`, `_`, or `*` characters, each followed +optionally by any number of spaces, forms a +[thematic break](@). + +```````````````````````````````` example +*** +--- +___ +. +
+
+
+```````````````````````````````` + + +Wrong characters: + +```````````````````````````````` example ++++ +. +

+++

+```````````````````````````````` + + +```````````````````````````````` example +=== +. +

===

+```````````````````````````````` + + +Not enough characters: + +```````````````````````````````` example +-- +** +__ +. +

-- +** +__

+```````````````````````````````` + + +One to three spaces indent are allowed: + +```````````````````````````````` example + *** + *** + *** +. +
+
+
+```````````````````````````````` + + +Four spaces is too many: + +```````````````````````````````` example + *** +. +
***
+
+```````````````````````````````` + + +```````````````````````````````` example +Foo + *** +. +

Foo +***

+```````````````````````````````` + + +More than three characters may be used: + +```````````````````````````````` example +_____________________________________ +. +
+```````````````````````````````` + + +Spaces are allowed between the characters: + +```````````````````````````````` example + - - - +. +
+```````````````````````````````` + + +```````````````````````````````` example + ** * ** * ** * ** +. +
+```````````````````````````````` + + +```````````````````````````````` example +- - - - +. +
+```````````````````````````````` + + +Spaces are allowed at the end: + +```````````````````````````````` example +- - - - +. +
+```````````````````````````````` + + +However, no other characters may occur in the line: + +```````````````````````````````` example +_ _ _ _ a + +a------ + +---a--- +. +

_ _ _ _ a

+

a------

+

---a---

+```````````````````````````````` + + +It is required that all of the [non-whitespace characters] be the same. +So, this is not a thematic break: + +```````````````````````````````` example + *-* +. +

-

+```````````````````````````````` + + +Thematic breaks do not need blank lines before or after: + +```````````````````````````````` example +- foo +*** +- bar +. + +
+ +```````````````````````````````` + + +Thematic breaks can interrupt a paragraph: + +```````````````````````````````` example +Foo +*** +bar +. +

Foo

+
+

bar

+```````````````````````````````` + + +If a line of dashes that meets the above conditions for being a +thematic break could also be interpreted as the underline of a [setext +heading], the interpretation as a +[setext heading] takes precedence. Thus, for example, +this is a setext heading, not a paragraph followed by a thematic break: + +```````````````````````````````` example +Foo +--- +bar +. +

Foo

+

bar

+```````````````````````````````` + + +When both a thematic break and a list item are possible +interpretations of a line, the thematic break takes precedence: + +```````````````````````````````` example +* Foo +* * * +* Bar +. + +
+ +```````````````````````````````` + + +If you want a thematic break in a list item, use a different bullet: + +```````````````````````````````` example +- Foo +- * * * +. + +```````````````````````````````` + + +## ATX headings + +An [ATX heading](@) +consists of a string of characters, parsed as inline content, between an +opening sequence of 1--6 unescaped `#` characters and an optional +closing sequence of any number of unescaped `#` characters. +The opening sequence of `#` characters must be followed by a +[space] or by the end of line. The optional closing sequence of `#`s must be +preceded by a [space] and may be followed by spaces only. The opening +`#` character may be indented 0-3 spaces. The raw contents of the +heading are stripped of leading and trailing spaces before being parsed +as inline content. The heading level is equal to the number of `#` +characters in the opening sequence. + +Simple headings: + +```````````````````````````````` example +# foo +## foo +### foo +#### foo +##### foo +###### foo +. +

foo

+

foo

+

foo

+

foo

+
foo
+
foo
+```````````````````````````````` + + +More than six `#` characters is not a heading: + +```````````````````````````````` example +####### foo +. +

####### foo

+```````````````````````````````` + + +At least one space is required between the `#` characters and the +heading's contents, unless the heading is empty. Note that many +implementations currently do not require the space. However, the +space was required by the +[original ATX implementation](http://www.aaronsw.com/2002/atx/atx.py), +and it helps prevent things like the following from being parsed as +headings: + +```````````````````````````````` example +#5 bolt + +#hashtag +. +

#5 bolt

+

#hashtag

+```````````````````````````````` + + +This is not a heading, because the first `#` is escaped: + +```````````````````````````````` example +\## foo +. +

## foo

+```````````````````````````````` + + +Contents are parsed as inlines: + +```````````````````````````````` example +# foo *bar* \*baz\* +. +

foo bar *baz*

+```````````````````````````````` + + +Leading and trailing blanks are ignored in parsing inline content: + +```````````````````````````````` example +# foo +. +

foo

+```````````````````````````````` + + +One to three spaces indentation are allowed: + +```````````````````````````````` example + ### foo + ## foo + # foo +. +

foo

+

foo

+

foo

+```````````````````````````````` + + +Four spaces are too much: + +```````````````````````````````` example + # foo +. +
# foo
+
+```````````````````````````````` + + +```````````````````````````````` example +foo + # bar +. +

foo +# bar

+```````````````````````````````` + + +A closing sequence of `#` characters is optional: + +```````````````````````````````` example +## foo ## + ### bar ### +. +

foo

+

bar

+```````````````````````````````` + + +It need not be the same length as the opening sequence: + +```````````````````````````````` example +# foo ################################## +##### foo ## +. +

foo

+
foo
+```````````````````````````````` + + +Spaces are allowed after the closing sequence: + +```````````````````````````````` example +### foo ### +. +

foo

+```````````````````````````````` + + +A sequence of `#` characters with anything but [spaces] following it +is not a closing sequence, but counts as part of the contents of the +heading: + +```````````````````````````````` example +### foo ### b +. +

foo ### b

+```````````````````````````````` + + +The closing sequence must be preceded by a space: + +```````````````````````````````` example +# foo# +. +

foo#

+```````````````````````````````` + + +Backslash-escaped `#` characters do not count as part +of the closing sequence: + +```````````````````````````````` example +### foo \### +## foo #\## +# foo \# +. +

foo ###

+

foo ###

+

foo #

+```````````````````````````````` + + +ATX headings need not be separated from surrounding content by blank +lines, and they can interrupt paragraphs: + +```````````````````````````````` example +**** +## foo +**** +. +
+

foo

+
+```````````````````````````````` + + +```````````````````````````````` example +Foo bar +# baz +Bar foo +. +

Foo bar

+

baz

+

Bar foo

+```````````````````````````````` + + +ATX headings can be empty: + +```````````````````````````````` example +## +# +### ### +. +

+

+

+```````````````````````````````` + + +## Setext headings + +A [setext heading](@) consists of one or more +lines of text, each containing at least one [non-whitespace +character], with no more than 3 spaces indentation, followed by +a [setext heading underline]. The lines of text must be such +that, were they not followed by the setext heading underline, +they would be interpreted as a paragraph: they cannot be +interpretable as a [code fence], [ATX heading][ATX headings], +[block quote][block quotes], [thematic break][thematic breaks], +[list item][list items], or [HTML block][HTML blocks]. + +A [setext heading underline](@) is a sequence of +`=` characters or a sequence of `-` characters, with no more than 3 +spaces indentation and any number of trailing spaces. If a line +containing a single `-` can be interpreted as an +empty [list items], it should be interpreted this way +and not as a [setext heading underline]. + +The heading is a level 1 heading if `=` characters are used in +the [setext heading underline], and a level 2 heading if `-` +characters are used. The contents of the heading are the result +of parsing the preceding lines of text as CommonMark inline +content. + +In general, a setext heading need not be preceded or followed by a +blank line. However, it cannot interrupt a paragraph, so when a +setext heading comes after a paragraph, a blank line is needed between +them. + +Simple examples: + +```````````````````````````````` example +Foo *bar* +========= + +Foo *bar* +--------- +. +

Foo bar

+

Foo bar

+```````````````````````````````` + + +The content of the header may span more than one line: + +```````````````````````````````` example +Foo *bar +baz* +==== +. +

Foo bar +baz

+```````````````````````````````` + + +The underlining can be any length: + +```````````````````````````````` example +Foo +------------------------- + +Foo += +. +

Foo

+

Foo

+```````````````````````````````` + + +The heading content can be indented up to three spaces, and need +not line up with the underlining: + +```````````````````````````````` example + Foo +--- + + Foo +----- + + Foo + === +. +

Foo

+

Foo

+

Foo

+```````````````````````````````` + + +Four spaces indent is too much: + +```````````````````````````````` example + Foo + --- + + Foo +--- +. +
Foo
+---
+
+Foo
+
+
+```````````````````````````````` + + +The setext heading underline can be indented up to three spaces, and +may have trailing spaces: + +```````````````````````````````` example +Foo + ---- +. +

Foo

+```````````````````````````````` + + +Four spaces is too much: + +```````````````````````````````` example +Foo + --- +. +

Foo +---

+```````````````````````````````` + + +The setext heading underline cannot contain internal spaces: + +```````````````````````````````` example +Foo += = + +Foo +--- - +. +

Foo += =

+

Foo

+
+```````````````````````````````` + + +Trailing spaces in the content line do not cause a line break: + +```````````````````````````````` example +Foo +----- +. +

Foo

+```````````````````````````````` + + +Nor does a backslash at the end: + +```````````````````````````````` example +Foo\ +---- +. +

Foo\

+```````````````````````````````` + + +Since indicators of block structure take precedence over +indicators of inline structure, the following are setext headings: + +```````````````````````````````` example +`Foo +---- +` + + +. +

`Foo

+

`

+

<a title="a lot

+

of dashes"/>

+```````````````````````````````` + + +The setext heading underline cannot be a [lazy continuation +line] in a list item or block quote: + +```````````````````````````````` example +> Foo +--- +. +
+

Foo

+
+
+```````````````````````````````` + + +```````````````````````````````` example +> foo +bar +=== +. +
+

foo +bar +===

+
+```````````````````````````````` + + +```````````````````````````````` example +- Foo +--- +. +
    +
  • Foo
  • +
+
+```````````````````````````````` + + +A blank line is needed between a paragraph and a following +setext heading, since otherwise the paragraph becomes part +of the heading's content: + +```````````````````````````````` example +Foo +Bar +--- +. +

Foo +Bar

+```````````````````````````````` + + +But in general a blank line is not required before or after +setext headings: + +```````````````````````````````` example +--- +Foo +--- +Bar +--- +Baz +. +
+

Foo

+

Bar

+

Baz

+```````````````````````````````` + + +Setext headings cannot be empty: + +```````````````````````````````` example + +==== +. +

====

+```````````````````````````````` + + +Setext heading text lines must not be interpretable as block +constructs other than paragraphs. So, the line of dashes +in these examples gets interpreted as a thematic break: + +```````````````````````````````` example +--- +--- +. +
+
+```````````````````````````````` + + +```````````````````````````````` example +- foo +----- +. +
    +
  • foo
  • +
+
+```````````````````````````````` + + +```````````````````````````````` example + foo +--- +. +
foo
+
+
+```````````````````````````````` + + +```````````````````````````````` example +> foo +----- +. +
+

foo

+
+
+```````````````````````````````` + + +If you want a heading with `> foo` as its literal text, you can +use backslash escapes: + +```````````````````````````````` example +\> foo +------ +. +

> foo

+```````````````````````````````` + + +**Compatibility note:** Most existing Markdown implementations +do not allow the text of setext headings to span multiple lines. +But there is no consensus about how to interpret + +``` markdown +Foo +bar +--- +baz +``` + +One can find four different interpretations: + +1. paragraph "Foo", heading "bar", paragraph "baz" +2. paragraph "Foo bar", thematic break, paragraph "baz" +3. paragraph "Foo bar --- baz" +4. heading "Foo bar", paragraph "baz" + +We find interpretation 4 most natural, and interpretation 4 +increases the expressive power of CommonMark, by allowing +multiline headings. Authors who want interpretation 1 can +put a blank line after the first paragraph: + +```````````````````````````````` example +Foo + +bar +--- +baz +. +

Foo

+

bar

+

baz

+```````````````````````````````` + + +Authors who want interpretation 2 can put blank lines around +the thematic break, + +```````````````````````````````` example +Foo +bar + +--- + +baz +. +

Foo +bar

+
+

baz

+```````````````````````````````` + + +or use a thematic break that cannot count as a [setext heading +underline], such as + +```````````````````````````````` example +Foo +bar +* * * +baz +. +

Foo +bar

+
+

baz

+```````````````````````````````` + + +Authors who want interpretation 3 can use backslash escapes: + +```````````````````````````````` example +Foo +bar +\--- +baz +. +

Foo +bar +--- +baz

+```````````````````````````````` + + +## Indented code blocks + +An [indented code block](@) is composed of one or more +[indented chunks] separated by blank lines. +An [indented chunk](@) is a sequence of non-blank lines, +each indented four or more spaces. The contents of the code block are +the literal contents of the lines, including trailing +[line endings], minus four spaces of indentation. +An indented code block has no [info string]. + +An indented code block cannot interrupt a paragraph, so there must be +a blank line between a paragraph and a following indented code block. +(A blank line is not needed, however, between a code block and a following +paragraph.) + +```````````````````````````````` example + a simple + indented code block +. +
a simple
+  indented code block
+
+```````````````````````````````` + + +If there is any ambiguity between an interpretation of indentation +as a code block and as indicating that material belongs to a [list +item][list items], the list item interpretation takes precedence: + +```````````````````````````````` example + - foo + + bar +. +
    +
  • +

    foo

    +

    bar

    +
  • +
+```````````````````````````````` + + +```````````````````````````````` example +1. foo + + - bar +. +
    +
  1. +

    foo

    +
      +
    • bar
    • +
    +
  2. +
+```````````````````````````````` + + + +The contents of a code block are literal text, and do not get parsed +as Markdown: + +```````````````````````````````` example +
+ *hi* + + - one +. +
<a/>
+*hi*
+
+- one
+
+```````````````````````````````` + + +Here we have three chunks separated by blank lines: + +```````````````````````````````` example + chunk1 + + chunk2 + + + + chunk3 +. +
chunk1
+
+chunk2
+
+
+
+chunk3
+
+```````````````````````````````` + + +Any initial spaces beyond four will be included in the content, even +in interior blank lines: + +```````````````````````````````` example + chunk1 + + chunk2 +. +
chunk1
+  
+  chunk2
+
+```````````````````````````````` + + +An indented code block cannot interrupt a paragraph. (This +allows hanging indents and the like.) + +```````````````````````````````` example +Foo + bar + +. +

Foo +bar

+```````````````````````````````` + + +However, any non-blank line with fewer than four leading spaces ends +the code block immediately. So a paragraph may occur immediately +after indented code: + +```````````````````````````````` example + foo +bar +. +
foo
+
+

bar

+```````````````````````````````` + + +And indented code can occur immediately before and after other kinds of +blocks: + +```````````````````````````````` example +# Heading + foo +Heading +------ + foo +---- +. +

Heading

+
foo
+
+

Heading

+
foo
+
+
+```````````````````````````````` + + +The first line can be indented more than four spaces: + +```````````````````````````````` example + foo + bar +. +
    foo
+bar
+
+```````````````````````````````` + + +Blank lines preceding or following an indented code block +are not included in it: + +```````````````````````````````` example + + + foo + + +. +
foo
+
+```````````````````````````````` + + +Trailing spaces are included in the code block's content: + +```````````````````````````````` example + foo +. +
foo  
+
+```````````````````````````````` + + + +## Fenced code blocks + +A [code fence](@) is a sequence +of at least three consecutive backtick characters (`` ` ``) or +tildes (`~`). (Tildes and backticks cannot be mixed.) +A [fenced code block](@) +begins with a code fence, indented no more than three spaces. + +The line with the opening code fence may optionally contain some text +following the code fence; this is trimmed of leading and trailing +spaces and called the [info string](@). +The [info string] may not contain any backtick +characters. (The reason for this restriction is that otherwise +some inline code would be incorrectly interpreted as the +beginning of a fenced code block.) + +The content of the code block consists of all subsequent lines, until +a closing [code fence] of the same type as the code block +began with (backticks or tildes), and with at least as many backticks +or tildes as the opening code fence. If the leading code fence is +indented N spaces, then up to N spaces of indentation are removed from +each line of the content (if present). (If a content line is not +indented, it is preserved unchanged. If it is indented less than N +spaces, all of the indentation is removed.) + +The closing code fence may be indented up to three spaces, and may be +followed only by spaces, which are ignored. If the end of the +containing block (or document) is reached and no closing code fence +has been found, the code block contains all of the lines after the +opening code fence until the end of the containing block (or +document). (An alternative spec would require backtracking in the +event that a closing code fence is not found. But this makes parsing +much less efficient, and there seems to be no real down side to the +behavior described here.) + +A fenced code block may interrupt a paragraph, and does not require +a blank line either before or after. + +The content of a code fence is treated as literal text, not parsed +as inlines. The first word of the [info string] is typically used to +specify the language of the code sample, and rendered in the `class` +attribute of the `code` tag. However, this spec does not mandate any +particular treatment of the [info string]. + +Here is a simple example with backticks: + +```````````````````````````````` example +``` +< + > +``` +. +
<
+ >
+
+```````````````````````````````` + + +With tildes: + +```````````````````````````````` example +~~~ +< + > +~~~ +. +
<
+ >
+
+```````````````````````````````` + + +The closing code fence must use the same character as the opening +fence: + +```````````````````````````````` example +``` +aaa +~~~ +``` +. +
aaa
+~~~
+
+```````````````````````````````` + + +```````````````````````````````` example +~~~ +aaa +``` +~~~ +. +
aaa
+```
+
+```````````````````````````````` + + +The closing code fence must be at least as long as the opening fence: + +```````````````````````````````` example +```` +aaa +``` +`````` +. +
aaa
+```
+
+```````````````````````````````` + + +```````````````````````````````` example +~~~~ +aaa +~~~ +~~~~ +. +
aaa
+~~~
+
+```````````````````````````````` + + +Unclosed code blocks are closed by the end of the document +(or the enclosing [block quote][block quotes] or [list item][list items]): + +```````````````````````````````` example +``` +. +
+```````````````````````````````` + + +```````````````````````````````` example +````` + +``` +aaa +. +

+```
+aaa
+
+```````````````````````````````` + + +```````````````````````````````` example +> ``` +> aaa + +bbb +. +
+
aaa
+
+
+

bbb

+```````````````````````````````` + + +A code block can have all empty lines as its content: + +```````````````````````````````` example +``` + + +``` +. +

+  
+
+```````````````````````````````` + + +A code block can be empty: + +```````````````````````````````` example +``` +``` +. +
+```````````````````````````````` + + +Fences can be indented. If the opening fence is indented, +content lines will have equivalent opening indentation removed, +if present: + +```````````````````````````````` example + ``` + aaa +aaa +``` +. +
aaa
+aaa
+
+```````````````````````````````` + + +```````````````````````````````` example + ``` +aaa + aaa +aaa + ``` +. +
aaa
+aaa
+aaa
+
+```````````````````````````````` + + +```````````````````````````````` example + ``` + aaa + aaa + aaa + ``` +. +
aaa
+ aaa
+aaa
+
+```````````````````````````````` + + +Four spaces indentation produces an indented code block: + +```````````````````````````````` example + ``` + aaa + ``` +. +
```
+aaa
+```
+
+```````````````````````````````` + + +Closing fences may be indented by 0-3 spaces, and their indentation +need not match that of the opening fence: + +```````````````````````````````` example +``` +aaa + ``` +. +
aaa
+
+```````````````````````````````` + + +```````````````````````````````` example + ``` +aaa + ``` +. +
aaa
+
+```````````````````````````````` + + +This is not a closing fence, because it is indented 4 spaces: + +```````````````````````````````` example +``` +aaa + ``` +. +
aaa
+    ```
+
+```````````````````````````````` + + + +Code fences (opening and closing) cannot contain internal spaces: + +```````````````````````````````` example +``` ``` +aaa +. +

+aaa

+```````````````````````````````` + + +```````````````````````````````` example +~~~~~~ +aaa +~~~ ~~ +. +
aaa
+~~~ ~~
+
+```````````````````````````````` + + +Fenced code blocks can interrupt paragraphs, and can be followed +directly by paragraphs, without a blank line between: + +```````````````````````````````` example +foo +``` +bar +``` +baz +. +

foo

+
bar
+
+

baz

+```````````````````````````````` + + +Other blocks can also occur before and after fenced code blocks +without an intervening blank line: + +```````````````````````````````` example +foo +--- +~~~ +bar +~~~ +# baz +. +

foo

+
bar
+
+

baz

+```````````````````````````````` + + +An [info string] can be provided after the opening code fence. +Opening and closing spaces will be stripped, and the first word, prefixed +with `language-`, is used as the value for the `class` attribute of the +`code` element within the enclosing `pre` element. + +```````````````````````````````` example +```ruby +def foo(x) + return 3 +end +``` +. +
def foo(x)
+  return 3
+end
+
+```````````````````````````````` + + +```````````````````````````````` example +~~~~ ruby startline=3 $%@#$ +def foo(x) + return 3 +end +~~~~~~~ +. +
def foo(x)
+  return 3
+end
+
+```````````````````````````````` + + +```````````````````````````````` example +````; +```` +. +
+```````````````````````````````` + + +[Info strings] for backtick code blocks cannot contain backticks: + +```````````````````````````````` example +``` aa ``` +foo +. +

aa +foo

+```````````````````````````````` + + +Closing code fences cannot have [info strings]: + +```````````````````````````````` example +``` +``` aaa +``` +. +
``` aaa
+
+```````````````````````````````` + + + +## HTML blocks + +An [HTML block](@) is a group of lines that is treated +as raw HTML (and will not be escaped in HTML output). + +There are seven kinds of [HTML block], which can be defined +by their start and end conditions. The block begins with a line that +meets a [start condition](@) (after up to three spaces +optional indentation). It ends with the first subsequent line that +meets a matching [end condition](@), or the last line of +the document or other [container block]), if no line is encountered that meets the +[end condition]. If the first line meets both the [start condition] +and the [end condition], the block will contain just that line. + +1. **Start condition:** line begins with the string ``, or the end of the line.\ +**End condition:** line contains an end tag +``, ``, or `` (case-insensitive; it +need not match the start tag). + +2. **Start condition:** line begins with the string ``. + +3. **Start condition:** line begins with the string ``. + +4. **Start condition:** line begins with the string ``. + +5. **Start condition:** line begins with the string +``. + +6. **Start condition:** line begins the string `<` or ``, or +the string `/>`.\ +**End condition:** line is followed by a [blank line]. + +7. **Start condition:** line begins with a complete [open tag] +or [closing tag] (with any [tag name] other than `script`, +`style`, or `pre`) followed only by [whitespace] +or the end of the line.\ +**End condition:** line is followed by a [blank line]. + +All types of [HTML blocks] except type 7 may interrupt +a paragraph. Blocks of type 7 may not interrupt a paragraph. +(This restriction is intended to prevent unwanted interpretation +of long tags inside a wrapped paragraph as starting HTML blocks.) + +Some simple examples follow. Here are some basic HTML blocks +of type 6: + +```````````````````````````````` example + + + + +
+ hi +
+ +okay. +. + + + + +
+ hi +
+

okay.

+```````````````````````````````` + + +```````````````````````````````` example +
+ *hello* + +. +
+ *hello* + +```````````````````````````````` + + +A block can also start with a closing tag: + +```````````````````````````````` example +
+*foo* +. +
+*foo* +```````````````````````````````` + + +Here we have two HTML blocks with a Markdown paragraph between them: + +```````````````````````````````` example +
+ +*Markdown* + +
+. +
+

Markdown

+
+```````````````````````````````` + + +The tag on the first line can be partial, as long +as it is split where there would be whitespace: + +```````````````````````````````` example +
+
+. +
+
+```````````````````````````````` + + +```````````````````````````````` example +
+
+. +
+
+```````````````````````````````` + + +An open tag need not be closed: +```````````````````````````````` example +
+*foo* + +*bar* +. +
+*foo* +

bar

+```````````````````````````````` + + + +A partial tag need not even be completed (garbage +in, garbage out): + +```````````````````````````````` example +
+. +
*foo*
+```````````````````````````````` + + +```````````````````````````````` example +
+foo +
+. +
+foo +
+```````````````````````````````` + + +Everything until the next blank line or end of document +gets included in the HTML block. So, in the following +example, what looks like a Markdown code block +is actually part of the HTML block, which continues until a blank +line or the end of the document is reached: + +```````````````````````````````` example +
+``` c +int x = 33; +``` +. +
+``` c +int x = 33; +``` +```````````````````````````````` + + +To start an [HTML block] with a tag that is *not* in the +list of block-level tags in (6), you must put the tag by +itself on the first line (and it must be complete): + +```````````````````````````````` example + +*bar* + +. + +*bar* + +```````````````````````````````` + + +In type 7 blocks, the [tag name] can be anything: + +```````````````````````````````` example + +*bar* + +. + +*bar* + +```````````````````````````````` + + +```````````````````````````````` example + +*bar* + +. + +*bar* + +```````````````````````````````` + + +```````````````````````````````` example + +*bar* +. + +*bar* +```````````````````````````````` + + +These rules are designed to allow us to work with tags that +can function as either block-level or inline-level tags. +The `` tag is a nice example. We can surround content with +`` tags in three different ways. In this case, we get a raw +HTML block, because the `` tag is on a line by itself: + +```````````````````````````````` example + +*foo* + +. + +*foo* + +```````````````````````````````` + + +In this case, we get a raw HTML block that just includes +the `` tag (because it ends with the following blank +line). So the contents get interpreted as CommonMark: + +```````````````````````````````` example + + +*foo* + + +. + +

foo

+
+```````````````````````````````` + + +Finally, in this case, the `` tags are interpreted +as [raw HTML] *inside* the CommonMark paragraph. (Because +the tag is not on a line by itself, we get inline HTML +rather than an [HTML block].) + +```````````````````````````````` example +*foo* +. +

foo

+```````````````````````````````` + + +HTML tags designed to contain literal content +(`script`, `style`, `pre`), comments, processing instructions, +and declarations are treated somewhat differently. +Instead of ending at the first blank line, these blocks +end at the first line containing a corresponding end tag. +As a result, these blocks can contain blank lines: + +A pre tag (type 1): + +```````````````````````````````` example +

+import Text.HTML.TagSoup
+
+main :: IO ()
+main = print $ parseTags tags
+
+okay +. +

+import Text.HTML.TagSoup
+
+main :: IO ()
+main = print $ parseTags tags
+
+

okay

+```````````````````````````````` + + +A script tag (type 1): + +```````````````````````````````` example + +okay +. + +

okay

+```````````````````````````````` + + +A style tag (type 1): + +```````````````````````````````` example + +okay +. + +

okay

+```````````````````````````````` + + +If there is no matching end tag, the block will end at the +end of the document (or the enclosing [block quote][block quotes] +or [list item][list items]): + +```````````````````````````````` example + +*foo* +. + +

foo

+```````````````````````````````` + + +```````````````````````````````` example +*bar* +*baz* +. +*bar* +

baz

+```````````````````````````````` + + +Note that anything on the last line after the +end tag will be included in the [HTML block]: + +```````````````````````````````` example +1. *bar* +. +1. *bar* +```````````````````````````````` + + +A comment (type 2): + +```````````````````````````````` example + +okay +. + +

okay

+```````````````````````````````` + + + +A processing instruction (type 3): + +```````````````````````````````` example +'; + +?> +okay +. +'; + +?> +

okay

+```````````````````````````````` + + +A declaration (type 4): + +```````````````````````````````` example + +. + +```````````````````````````````` + + +CDATA (type 5): + +```````````````````````````````` example + +okay +. + +

okay

+```````````````````````````````` + + +The opening tag can be indented 1-3 spaces, but not 4: + +```````````````````````````````` example + + + +. + +
<!-- foo -->
+
+```````````````````````````````` + + +```````````````````````````````` example +
+ +
+. +
+
<div>
+
+```````````````````````````````` + + +An HTML block of types 1--6 can interrupt a paragraph, and need not be +preceded by a blank line. + +```````````````````````````````` example +Foo +
+bar +
+. +

Foo

+
+bar +
+```````````````````````````````` + + +However, a following blank line is needed, except at the end of +a document, and except for blocks of types 1--5, above: + +```````````````````````````````` example +
+bar +
+*foo* +. +
+bar +
+*foo* +```````````````````````````````` + + +HTML blocks of type 7 cannot interrupt a paragraph: + +```````````````````````````````` example +Foo + +baz +. +

Foo + +baz

+```````````````````````````````` + + +This rule differs from John Gruber's original Markdown syntax +specification, which says: + +> The only restrictions are that block-level HTML elements — +> e.g. `
`, ``, `
`, `

`, etc. — must be separated from +> surrounding content by blank lines, and the start and end tags of the +> block should not be indented with tabs or spaces. + +In some ways Gruber's rule is more restrictive than the one given +here: + +- It requires that an HTML block be preceded by a blank line. +- It does not allow the start tag to be indented. +- It requires a matching end tag, which it also does not allow to + be indented. + +Most Markdown implementations (including some of Gruber's own) do not +respect all of these restrictions. + +There is one respect, however, in which Gruber's rule is more liberal +than the one given here, since it allows blank lines to occur inside +an HTML block. There are two reasons for disallowing them here. +First, it removes the need to parse balanced tags, which is +expensive and can require backtracking from the end of the document +if no matching end tag is found. Second, it provides a very simple +and flexible way of including Markdown content inside HTML tags: +simply separate the Markdown from the HTML using blank lines: + +Compare: + +```````````````````````````````` example +

+ +*Emphasized* text. + +
+. +
+

Emphasized text.

+
+```````````````````````````````` + + +```````````````````````````````` example +
+*Emphasized* text. +
+. +
+*Emphasized* text. +
+```````````````````````````````` + + +Some Markdown implementations have adopted a convention of +interpreting content inside tags as text if the open tag has +the attribute `markdown=1`. The rule given above seems a simpler and +more elegant way of achieving the same expressive power, which is also +much simpler to parse. + +The main potential drawback is that one can no longer paste HTML +blocks into Markdown documents with 100% reliability. However, +*in most cases* this will work fine, because the blank lines in +HTML are usually followed by HTML block tags. For example: + +```````````````````````````````` example +
+ + + + + + + +
+Hi +
+. + + + + +
+Hi +
+```````````````````````````````` + + +There are problems, however, if the inner tags are indented +*and* separated by spaces, as then they will be interpreted as +an indented code block: + +```````````````````````````````` example + + + + + + + + +
+ Hi +
+. + + +
<td>
+  Hi
+</td>
+
+ +
+```````````````````````````````` + + +Fortunately, blank lines are usually not necessary and can be +deleted. The exception is inside `
` tags, but as described
+above, raw HTML blocks starting with `
` *can* contain blank
+lines.
+
+## Link reference definitions
+
+A [link reference definition](@)
+consists of a [link label], indented up to three spaces, followed
+by a colon (`:`), optional [whitespace] (including up to one
+[line ending]), a [link destination],
+optional [whitespace] (including up to one
+[line ending]), and an optional [link
+title], which if it is present must be separated
+from the [link destination] by [whitespace].
+No further [non-whitespace characters] may occur on the line.
+
+A [link reference definition]
+does not correspond to a structural element of a document.  Instead, it
+defines a label which can be used in [reference links]
+and reference-style [images] elsewhere in the document.  [Link
+reference definitions] can come either before or after the links that use
+them.
+
+```````````````````````````````` example
+[foo]: /url "title"
+
+[foo]
+.
+

foo

+```````````````````````````````` + + +```````````````````````````````` example + [foo]: + /url + 'the title' + +[foo] +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +[Foo*bar\]]:my_(url) 'title (with parens)' + +[Foo*bar\]] +. +

Foo*bar]

+```````````````````````````````` + + +```````````````````````````````` example +[Foo bar]: + +'title' + +[Foo bar] +. +

Foo bar

+```````````````````````````````` + + +The title may extend over multiple lines: + +```````````````````````````````` example +[foo]: /url ' +title +line1 +line2 +' + +[foo] +. +

foo

+```````````````````````````````` + + +However, it may not contain a [blank line]: + +```````````````````````````````` example +[foo]: /url 'title + +with blank line' + +[foo] +. +

[foo]: /url 'title

+

with blank line'

+

[foo]

+```````````````````````````````` + + +The title may be omitted: + +```````````````````````````````` example +[foo]: +/url + +[foo] +. +

foo

+```````````````````````````````` + + +The link destination may not be omitted: + +```````````````````````````````` example +[foo]: + +[foo] +. +

[foo]:

+

[foo]

+```````````````````````````````` + + +Both title and destination can contain backslash escapes +and literal backslashes: + +```````````````````````````````` example +[foo]: /url\bar\*baz "foo\"bar\baz" + +[foo] +. +

foo

+```````````````````````````````` + + +A link can come before its corresponding definition: + +```````````````````````````````` example +[foo] + +[foo]: url +. +

foo

+```````````````````````````````` + + +If there are several matching definitions, the first one takes +precedence: + +```````````````````````````````` example +[foo] + +[foo]: first +[foo]: second +. +

foo

+```````````````````````````````` + + +As noted in the section on [Links], matching of labels is +case-insensitive (see [matches]). + +```````````````````````````````` example +[FOO]: /url + +[Foo] +. +

Foo

+```````````````````````````````` + + +```````````````````````````````` example +[ΑΓΩ]: /φου + +[αγω] +. +

αγω

+```````````````````````````````` + + +Here is a link reference definition with no corresponding link. +It contributes nothing to the document. + +```````````````````````````````` example +[foo]: /url +. +```````````````````````````````` + + +Here is another one: + +```````````````````````````````` example +[ +foo +]: /url +bar +. +

bar

+```````````````````````````````` + + +This is not a link reference definition, because there are +[non-whitespace characters] after the title: + +```````````````````````````````` example +[foo]: /url "title" ok +. +

[foo]: /url "title" ok

+```````````````````````````````` + + +This is a link reference definition, but it has no title: + +```````````````````````````````` example +[foo]: /url +"title" ok +. +

"title" ok

+```````````````````````````````` + + +This is not a link reference definition, because it is indented +four spaces: + +```````````````````````````````` example + [foo]: /url "title" + +[foo] +. +
[foo]: /url "title"
+
+

[foo]

+```````````````````````````````` + + +This is not a link reference definition, because it occurs inside +a code block: + +```````````````````````````````` example +``` +[foo]: /url +``` + +[foo] +. +
[foo]: /url
+
+

[foo]

+```````````````````````````````` + + +A [link reference definition] cannot interrupt a paragraph. + +```````````````````````````````` example +Foo +[bar]: /baz + +[bar] +. +

Foo +[bar]: /baz

+

[bar]

+```````````````````````````````` + + +However, it can directly follow other block elements, such as headings +and thematic breaks, and it need not be followed by a blank line. + +```````````````````````````````` example +# [Foo] +[foo]: /url +> bar +. +

Foo

+
+

bar

+
+```````````````````````````````` + + +Several [link reference definitions] +can occur one after another, without intervening blank lines. + +```````````````````````````````` example +[foo]: /foo-url "foo" +[bar]: /bar-url + "bar" +[baz]: /baz-url + +[foo], +[bar], +[baz] +. +

foo, +bar, +baz

+```````````````````````````````` + + +[Link reference definitions] can occur +inside block containers, like lists and block quotations. They +affect the entire document, not just the container in which they +are defined: + +```````````````````````````````` example +[foo] + +> [foo]: /url +. +

foo

+
+
+```````````````````````````````` + + + +## Paragraphs + +A sequence of non-blank lines that cannot be interpreted as other +kinds of blocks forms a [paragraph](@). +The contents of the paragraph are the result of parsing the +paragraph's raw content as inlines. The paragraph's raw content +is formed by concatenating the lines and removing initial and final +[whitespace]. + +A simple example with two paragraphs: + +```````````````````````````````` example +aaa + +bbb +. +

aaa

+

bbb

+```````````````````````````````` + + +Paragraphs can contain multiple lines, but no blank lines: + +```````````````````````````````` example +aaa +bbb + +ccc +ddd +. +

aaa +bbb

+

ccc +ddd

+```````````````````````````````` + + +Multiple blank lines between paragraph have no effect: + +```````````````````````````````` example +aaa + + +bbb +. +

aaa

+

bbb

+```````````````````````````````` + + +Leading spaces are skipped: + +```````````````````````````````` example + aaa + bbb +. +

aaa +bbb

+```````````````````````````````` + + +Lines after the first may be indented any amount, since indented +code blocks cannot interrupt paragraphs. + +```````````````````````````````` example +aaa + bbb + ccc +. +

aaa +bbb +ccc

+```````````````````````````````` + + +However, the first line may be indented at most three spaces, +or an indented code block will be triggered: + +```````````````````````````````` example + aaa +bbb +. +

aaa +bbb

+```````````````````````````````` + + +```````````````````````````````` example + aaa +bbb +. +
aaa
+
+

bbb

+```````````````````````````````` + + +Final spaces are stripped before inline parsing, so a paragraph +that ends with two or more spaces will not end with a [hard line +break]: + +```````````````````````````````` example +aaa +bbb +. +

aaa
+bbb

+```````````````````````````````` + + +## Blank lines + +[Blank lines] between block-level elements are ignored, +except for the role they play in determining whether a [list] +is [tight] or [loose]. + +Blank lines at the beginning and end of the document are also ignored. + +```````````````````````````````` example + + +aaa + + +# aaa + + +. +

aaa

+

aaa

+```````````````````````````````` + + + +# Container blocks + +A [container block] is a block that has other +blocks as its contents. There are two basic kinds of container blocks: +[block quotes] and [list items]. +[Lists] are meta-containers for [list items]. + +We define the syntax for container blocks recursively. The general +form of the definition is: + +> If X is a sequence of blocks, then the result of +> transforming X in such-and-such a way is a container of type Y +> with these blocks as its content. + +So, we explain what counts as a block quote or list item by explaining +how these can be *generated* from their contents. This should suffice +to define the syntax, although it does not give a recipe for *parsing* +these constructions. (A recipe is provided below in the section entitled +[A parsing strategy](#appendix-a-parsing-strategy).) + +## Block quotes + +A [block quote marker](@) +consists of 0-3 spaces of initial indent, plus (a) the character `>` together +with a following space, or (b) a single character `>` not followed by a space. + +The following rules define [block quotes]: + +1. **Basic case.** If a string of lines *Ls* constitute a sequence + of blocks *Bs*, then the result of prepending a [block quote + marker] to the beginning of each line in *Ls* + is a [block quote](#block-quotes) containing *Bs*. + +2. **Laziness.** If a string of lines *Ls* constitute a [block + quote](#block-quotes) with contents *Bs*, then the result of deleting + the initial [block quote marker] from one or + more lines in which the next [non-whitespace character] after the [block + quote marker] is [paragraph continuation + text] is a block quote with *Bs* as its content. + [Paragraph continuation text](@) is text + that will be parsed as part of the content of a paragraph, but does + not occur at the beginning of the paragraph. + +3. **Consecutiveness.** A document cannot contain two [block + quotes] in a row unless there is a [blank line] between them. + +Nothing else counts as a [block quote](#block-quotes). + +Here is a simple example: + +```````````````````````````````` example +> # Foo +> bar +> baz +. +
+

Foo

+

bar +baz

+
+```````````````````````````````` + + +The spaces after the `>` characters can be omitted: + +```````````````````````````````` example +># Foo +>bar +> baz +. +
+

Foo

+

bar +baz

+
+```````````````````````````````` + + +The `>` characters can be indented 1-3 spaces: + +```````````````````````````````` example + > # Foo + > bar + > baz +. +
+

Foo

+

bar +baz

+
+```````````````````````````````` + + +Four spaces gives us a code block: + +```````````````````````````````` example + > # Foo + > bar + > baz +. +
> # Foo
+> bar
+> baz
+
+```````````````````````````````` + + +The Laziness clause allows us to omit the `>` before +[paragraph continuation text]: + +```````````````````````````````` example +> # Foo +> bar +baz +. +
+

Foo

+

bar +baz

+
+```````````````````````````````` + + +A block quote can contain some lazy and some non-lazy +continuation lines: + +```````````````````````````````` example +> bar +baz +> foo +. +
+

bar +baz +foo

+
+```````````````````````````````` + + +Laziness only applies to lines that would have been continuations of +paragraphs had they been prepended with [block quote markers]. +For example, the `> ` cannot be omitted in the second line of + +``` markdown +> foo +> --- +``` + +without changing the meaning: + +```````````````````````````````` example +> foo +--- +. +
+

foo

+
+
+```````````````````````````````` + + +Similarly, if we omit the `> ` in the second line of + +``` markdown +> - foo +> - bar +``` + +then the block quote ends after the first line: + +```````````````````````````````` example +> - foo +- bar +. +
+
    +
  • foo
  • +
+
+
    +
  • bar
  • +
+```````````````````````````````` + + +For the same reason, we can't omit the `> ` in front of +subsequent lines of an indented or fenced code block: + +```````````````````````````````` example +> foo + bar +. +
+
foo
+
+
+
bar
+
+```````````````````````````````` + + +```````````````````````````````` example +> ``` +foo +``` +. +
+
+
+

foo

+
+```````````````````````````````` + + +Note that in the following case, we have a [lazy +continuation line]: + +```````````````````````````````` example +> foo + - bar +. +
+

foo +- bar

+
+```````````````````````````````` + + +To see why, note that in + +```markdown +> foo +> - bar +``` + +the `- bar` is indented too far to start a list, and can't +be an indented code block because indented code blocks cannot +interrupt paragraphs, so it is [paragraph continuation text]. + +A block quote can be empty: + +```````````````````````````````` example +> +. +
+
+```````````````````````````````` + + +```````````````````````````````` example +> +> +> +. +
+
+```````````````````````````````` + + +A block quote can have initial or final blank lines: + +```````````````````````````````` example +> +> foo +> +. +
+

foo

+
+```````````````````````````````` + + +A blank line always separates block quotes: + +```````````````````````````````` example +> foo + +> bar +. +
+

foo

+
+
+

bar

+
+```````````````````````````````` + + +(Most current Markdown implementations, including John Gruber's +original `Markdown.pl`, will parse this example as a single block quote +with two paragraphs. But it seems better to allow the author to decide +whether two block quotes or one are wanted.) + +Consecutiveness means that if we put these block quotes together, +we get a single block quote: + +```````````````````````````````` example +> foo +> bar +. +
+

foo +bar

+
+```````````````````````````````` + + +To get a block quote with two paragraphs, use: + +```````````````````````````````` example +> foo +> +> bar +. +
+

foo

+

bar

+
+```````````````````````````````` + + +Block quotes can interrupt paragraphs: + +```````````````````````````````` example +foo +> bar +. +

foo

+
+

bar

+
+```````````````````````````````` + + +In general, blank lines are not needed before or after block +quotes: + +```````````````````````````````` example +> aaa +*** +> bbb +. +
+

aaa

+
+
+
+

bbb

+
+```````````````````````````````` + + +However, because of laziness, a blank line is needed between +a block quote and a following paragraph: + +```````````````````````````````` example +> bar +baz +. +
+

bar +baz

+
+```````````````````````````````` + + +```````````````````````````````` example +> bar + +baz +. +
+

bar

+
+

baz

+```````````````````````````````` + + +```````````````````````````````` example +> bar +> +baz +. +
+

bar

+
+

baz

+```````````````````````````````` + + +It is a consequence of the Laziness rule that any number +of initial `>`s may be omitted on a continuation line of a +nested block quote: + +```````````````````````````````` example +> > > foo +bar +. +
+
+
+

foo +bar

+
+
+
+```````````````````````````````` + + +```````````````````````````````` example +>>> foo +> bar +>>baz +. +
+
+
+

foo +bar +baz

+
+
+
+```````````````````````````````` + + +When including an indented code block in a block quote, +remember that the [block quote marker] includes +both the `>` and a following space. So *five spaces* are needed after +the `>`: + +```````````````````````````````` example +> code + +> not code +. +
+
code
+
+
+
+

not code

+
+```````````````````````````````` + + + +## List items + +A [list marker](@) is a +[bullet list marker] or an [ordered list marker]. + +A [bullet list marker](@) +is a `-`, `+`, or `*` character. + +An [ordered list marker](@) +is a sequence of 1--9 arabic digits (`0-9`), followed by either a +`.` character or a `)` character. (The reason for the length +limit is that with 10 digits we start seeing integer overflows +in some browsers.) + +The following rules define [list items]: + +1. **Basic case.** If a sequence of lines *Ls* constitute a sequence of + blocks *Bs* starting with a [non-whitespace character] and not separated + from each other by more than one blank line, and *M* is a list + marker of width *W* followed by 1 ≤ *N* ≤ 4 spaces, then the result + of prepending *M* and the following spaces to the first line of + *Ls*, and indenting subsequent lines of *Ls* by *W + N* spaces, is a + list item with *Bs* as its contents. The type of the list item + (bullet or ordered) is determined by the type of its list marker. + If the list item is ordered, then it is also assigned a start + number, based on the ordered list marker. + + Exceptions: When the first list item in a [list] interrupts + a paragraph---that is, when it starts on a line that would + otherwise count as [paragraph continuation text]---then (a) + the lines *Ls* must not begin with a blank line, and (b) if + the list item is ordered, the start number must be 1. + +For example, let *Ls* be the lines + +```````````````````````````````` example +A paragraph +with two lines. + + indented code + +> A block quote. +. +

A paragraph +with two lines.

+
indented code
+
+
+

A block quote.

+
+```````````````````````````````` + + +And let *M* be the marker `1.`, and *N* = 2. Then rule #1 says +that the following is an ordered list item with start number 1, +and the same contents as *Ls*: + +```````````````````````````````` example +1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
    +
  1. +

    A paragraph +with two lines.

    +
    indented code
    +
    +
    +

    A block quote.

    +
    +
  2. +
+```````````````````````````````` + + +The most important thing to notice is that the position of +the text after the list marker determines how much indentation +is needed in subsequent blocks in the list item. If the list +marker takes up two spaces, and there are three spaces between +the list marker and the next [non-whitespace character], then blocks +must be indented five spaces in order to fall under the list +item. + +Here are some examples showing how far content must be indented to be +put under the list item: + +```````````````````````````````` example +- one + + two +. +
    +
  • one
  • +
+

two

+```````````````````````````````` + + +```````````````````````````````` example +- one + + two +. +
    +
  • +

    one

    +

    two

    +
  • +
+```````````````````````````````` + + +```````````````````````````````` example + - one + + two +. +
    +
  • one
  • +
+
 two
+
+```````````````````````````````` + + +```````````````````````````````` example + - one + + two +. +
    +
  • +

    one

    +

    two

    +
  • +
+```````````````````````````````` + + +It is tempting to think of this in terms of columns: the continuation +blocks must be indented at least to the column of the first +[non-whitespace character] after the list marker. However, that is not quite right. +The spaces after the list marker determine how much relative indentation +is needed. Which column this indentation reaches will depend on +how the list item is embedded in other constructions, as shown by +this example: + +```````````````````````````````` example + > > 1. one +>> +>> two +. +
+
+
    +
  1. +

    one

    +

    two

    +
  2. +
+
+
+```````````````````````````````` + + +Here `two` occurs in the same column as the list marker `1.`, +but is actually contained in the list item, because there is +sufficient indentation after the last containing blockquote marker. + +The converse is also possible. In the following example, the word `two` +occurs far to the right of the initial text of the list item, `one`, but +it is not considered part of the list item, because it is not indented +far enough past the blockquote marker: + +```````````````````````````````` example +>>- one +>> + > > two +. +
+
+
    +
  • one
  • +
+

two

+
+
+```````````````````````````````` + + +Note that at least one space is needed between the list marker and +any following content, so these are not list items: + +```````````````````````````````` example +-one + +2.two +. +

-one

+

2.two

+```````````````````````````````` + + +A list item may contain blocks that are separated by more than +one blank line. + +```````````````````````````````` example +- foo + + + bar +. +
    +
  • +

    foo

    +

    bar

    +
  • +
+```````````````````````````````` + + +A list item may contain any kind of block: + +```````````````````````````````` example +1. foo + + ``` + bar + ``` + + baz + + > bam +. +
    +
  1. +

    foo

    +
    bar
    +
    +

    baz

    +
    +

    bam

    +
    +
  2. +
+```````````````````````````````` + + +A list item that contains an indented code block will preserve +empty lines within the code block verbatim. + +```````````````````````````````` example +- Foo + + bar + + + baz +. +
    +
  • +

    Foo

    +
    bar
    +
    +
    +baz
    +
    +
  • +
+```````````````````````````````` + +Note that ordered list start numbers must be nine digits or less: + +```````````````````````````````` example +123456789. ok +. +
    +
  1. ok
  2. +
+```````````````````````````````` + + +```````````````````````````````` example +1234567890. not ok +. +

1234567890. not ok

+```````````````````````````````` + + +A start number may begin with 0s: + +```````````````````````````````` example +0. ok +. +
    +
  1. ok
  2. +
+```````````````````````````````` + + +```````````````````````````````` example +003. ok +. +
    +
  1. ok
  2. +
+```````````````````````````````` + + +A start number may not be negative: + +```````````````````````````````` example +-1. not ok +. +

-1. not ok

+```````````````````````````````` + + + +2. **Item starting with indented code.** If a sequence of lines *Ls* + constitute a sequence of blocks *Bs* starting with an indented code + block and not separated from each other by more than one blank line, + and *M* is a list marker of width *W* followed by + one space, then the result of prepending *M* and the following + space to the first line of *Ls*, and indenting subsequent lines of + *Ls* by *W + 1* spaces, is a list item with *Bs* as its contents. + If a line is empty, then it need not be indented. The type of the + list item (bullet or ordered) is determined by the type of its list + marker. If the list item is ordered, then it is also assigned a + start number, based on the ordered list marker. + +An indented code block will have to be indented four spaces beyond +the edge of the region where text will be included in the list item. +In the following case that is 6 spaces: + +```````````````````````````````` example +- foo + + bar +. +
    +
  • +

    foo

    +
    bar
    +
    +
  • +
+```````````````````````````````` + + +And in this case it is 11 spaces: + +```````````````````````````````` example + 10. foo + + bar +. +
    +
  1. +

    foo

    +
    bar
    +
    +
  2. +
+```````````````````````````````` + + +If the *first* block in the list item is an indented code block, +then by rule #2, the contents must be indented *one* space after the +list marker: + +```````````````````````````````` example + indented code + +paragraph + + more code +. +
indented code
+
+

paragraph

+
more code
+
+```````````````````````````````` + + +```````````````````````````````` example +1. indented code + + paragraph + + more code +. +
    +
  1. +
    indented code
    +
    +

    paragraph

    +
    more code
    +
    +
  2. +
+```````````````````````````````` + + +Note that an additional space indent is interpreted as space +inside the code block: + +```````````````````````````````` example +1. indented code + + paragraph + + more code +. +
    +
  1. +
     indented code
    +
    +

    paragraph

    +
    more code
    +
    +
  2. +
+```````````````````````````````` + + +Note that rules #1 and #2 only apply to two cases: (a) cases +in which the lines to be included in a list item begin with a +[non-whitespace character], and (b) cases in which +they begin with an indented code +block. In a case like the following, where the first block begins with +a three-space indent, the rules do not allow us to form a list item by +indenting the whole thing and prepending a list marker: + +```````````````````````````````` example + foo + +bar +. +

foo

+

bar

+```````````````````````````````` + + +```````````````````````````````` example +- foo + + bar +. +
    +
  • foo
  • +
+

bar

+```````````````````````````````` + + +This is not a significant restriction, because when a block begins +with 1-3 spaces indent, the indentation can always be removed without +a change in interpretation, allowing rule #1 to be applied. So, in +the above case: + +```````````````````````````````` example +- foo + + bar +. +
    +
  • +

    foo

    +

    bar

    +
  • +
+```````````````````````````````` + + +3. **Item starting with a blank line.** If a sequence of lines *Ls* + starting with a single [blank line] constitute a (possibly empty) + sequence of blocks *Bs*, not separated from each other by more than + one blank line, and *M* is a list marker of width *W*, + then the result of prepending *M* to the first line of *Ls*, and + indenting subsequent lines of *Ls* by *W + 1* spaces, is a list + item with *Bs* as its contents. + If a line is empty, then it need not be indented. The type of the + list item (bullet or ordered) is determined by the type of its list + marker. If the list item is ordered, then it is also assigned a + start number, based on the ordered list marker. + +Here are some list items that start with a blank line but are not empty: + +```````````````````````````````` example +- + foo +- + ``` + bar + ``` +- + baz +. +
    +
  • foo
  • +
  • +
    bar
    +
    +
  • +
  • +
    baz
    +
    +
  • +
+```````````````````````````````` + +When the list item starts with a blank line, the number of spaces +following the list marker doesn't change the required indentation: + +```````````````````````````````` example +- + foo +. +
    +
  • foo
  • +
+```````````````````````````````` + + +A list item can begin with at most one blank line. +In the following example, `foo` is not part of the list +item: + +```````````````````````````````` example +- + + foo +. +
    +
  • +
+

foo

+```````````````````````````````` + + +Here is an empty bullet list item: + +```````````````````````````````` example +- foo +- +- bar +. +
    +
  • foo
  • +
  • +
  • bar
  • +
+```````````````````````````````` + + +It does not matter whether there are spaces following the [list marker]: + +```````````````````````````````` example +- foo +- +- bar +. +
    +
  • foo
  • +
  • +
  • bar
  • +
+```````````````````````````````` + + +Here is an empty ordered list item: + +```````````````````````````````` example +1. foo +2. +3. bar +. +
    +
  1. foo
  2. +
  3. +
  4. bar
  5. +
+```````````````````````````````` + + +A list may start or end with an empty list item: + +```````````````````````````````` example +* +. +
    +
  • +
+```````````````````````````````` + +However, an empty list item cannot interrupt a paragraph: + +```````````````````````````````` example +foo +* + +foo +1. +. +

foo +*

+

foo +1.

+```````````````````````````````` + + +4. **Indentation.** If a sequence of lines *Ls* constitutes a list item + according to rule #1, #2, or #3, then the result of indenting each line + of *Ls* by 1-3 spaces (the same for each line) also constitutes a + list item with the same contents and attributes. If a line is + empty, then it need not be indented. + +Indented one space: + +```````````````````````````````` example + 1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
    +
  1. +

    A paragraph +with two lines.

    +
    indented code
    +
    +
    +

    A block quote.

    +
    +
  2. +
+```````````````````````````````` + + +Indented two spaces: + +```````````````````````````````` example + 1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
    +
  1. +

    A paragraph +with two lines.

    +
    indented code
    +
    +
    +

    A block quote.

    +
    +
  2. +
+```````````````````````````````` + + +Indented three spaces: + +```````````````````````````````` example + 1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
    +
  1. +

    A paragraph +with two lines.

    +
    indented code
    +
    +
    +

    A block quote.

    +
    +
  2. +
+```````````````````````````````` + + +Four spaces indent gives a code block: + +```````````````````````````````` example + 1. A paragraph + with two lines. + + indented code + + > A block quote. +. +
1.  A paragraph
+    with two lines.
+
+        indented code
+
+    > A block quote.
+
+```````````````````````````````` + + + +5. **Laziness.** If a string of lines *Ls* constitute a [list + item](#list-items) with contents *Bs*, then the result of deleting + some or all of the indentation from one or more lines in which the + next [non-whitespace character] after the indentation is + [paragraph continuation text] is a + list item with the same contents and attributes. The unindented + lines are called + [lazy continuation line](@)s. + +Here is an example with [lazy continuation lines]: + +```````````````````````````````` example + 1. A paragraph +with two lines. + + indented code + + > A block quote. +. +
    +
  1. +

    A paragraph +with two lines.

    +
    indented code
    +
    +
    +

    A block quote.

    +
    +
  2. +
+```````````````````````````````` + + +Indentation can be partially deleted: + +```````````````````````````````` example + 1. A paragraph + with two lines. +. +
    +
  1. A paragraph +with two lines.
  2. +
+```````````````````````````````` + + +These examples show how laziness can work in nested structures: + +```````````````````````````````` example +> 1. > Blockquote +continued here. +. +
+
    +
  1. +
    +

    Blockquote +continued here.

    +
    +
  2. +
+
+```````````````````````````````` + + +```````````````````````````````` example +> 1. > Blockquote +> continued here. +. +
+
    +
  1. +
    +

    Blockquote +continued here.

    +
    +
  2. +
+
+```````````````````````````````` + + + +6. **That's all.** Nothing that is not counted as a list item by rules + #1--5 counts as a [list item](#list-items). + +The rules for sublists follow from the general rules above. A sublist +must be indented the same number of spaces a paragraph would need to be +in order to be included in the list item. + +So, in this case we need two spaces indent: + +```````````````````````````````` example +- foo + - bar + - baz + - boo +. +
    +
  • foo +
      +
    • bar +
        +
      • baz +
          +
        • boo
        • +
        +
      • +
      +
    • +
    +
  • +
+```````````````````````````````` + + +One is not enough: + +```````````````````````````````` example +- foo + - bar + - baz + - boo +. +
    +
  • foo
  • +
  • bar
  • +
  • baz
  • +
  • boo
  • +
+```````````````````````````````` + + +Here we need four, because the list marker is wider: + +```````````````````````````````` example +10) foo + - bar +. +
    +
  1. foo +
      +
    • bar
    • +
    +
  2. +
+```````````````````````````````` + + +Three is not enough: + +```````````````````````````````` example +10) foo + - bar +. +
    +
  1. foo
  2. +
+
    +
  • bar
  • +
+```````````````````````````````` + + +A list may be the first block in a list item: + +```````````````````````````````` example +- - foo +. +
    +
  • +
      +
    • foo
    • +
    +
  • +
+```````````````````````````````` + + +```````````````````````````````` example +1. - 2. foo +. +
    +
  1. +
      +
    • +
        +
      1. foo
      2. +
      +
    • +
    +
  2. +
+```````````````````````````````` + + +A list item can contain a heading: + +```````````````````````````````` example +- # Foo +- Bar + --- + baz +. +
    +
  • +

    Foo

    +
  • +
  • +

    Bar

    +baz
  • +
+```````````````````````````````` + + +### Motivation + +John Gruber's Markdown spec says the following about list items: + +1. "List markers typically start at the left margin, but may be indented + by up to three spaces. List markers must be followed by one or more + spaces or a tab." + +2. "To make lists look nice, you can wrap items with hanging indents.... + But if you don't want to, you don't have to." + +3. "List items may consist of multiple paragraphs. Each subsequent + paragraph in a list item must be indented by either 4 spaces or one + tab." + +4. "It looks nice if you indent every line of the subsequent paragraphs, + but here again, Markdown will allow you to be lazy." + +5. "To put a blockquote within a list item, the blockquote's `>` + delimiters need to be indented." + +6. "To put a code block within a list item, the code block needs to be + indented twice — 8 spaces or two tabs." + +These rules specify that a paragraph under a list item must be indented +four spaces (presumably, from the left margin, rather than the start of +the list marker, but this is not said), and that code under a list item +must be indented eight spaces instead of the usual four. They also say +that a block quote must be indented, but not by how much; however, the +example given has four spaces indentation. Although nothing is said +about other kinds of block-level content, it is certainly reasonable to +infer that *all* block elements under a list item, including other +lists, must be indented four spaces. This principle has been called the +*four-space rule*. + +The four-space rule is clear and principled, and if the reference +implementation `Markdown.pl` had followed it, it probably would have +become the standard. However, `Markdown.pl` allowed paragraphs and +sublists to start with only two spaces indentation, at least on the +outer level. Worse, its behavior was inconsistent: a sublist of an +outer-level list needed two spaces indentation, but a sublist of this +sublist needed three spaces. It is not surprising, then, that different +implementations of Markdown have developed very different rules for +determining what comes under a list item. (Pandoc and python-Markdown, +for example, stuck with Gruber's syntax description and the four-space +rule, while discount, redcarpet, marked, PHP Markdown, and others +followed `Markdown.pl`'s behavior more closely.) + +Unfortunately, given the divergences between implementations, there +is no way to give a spec for list items that will be guaranteed not +to break any existing documents. However, the spec given here should +correctly handle lists formatted with either the four-space rule or +the more forgiving `Markdown.pl` behavior, provided they are laid out +in a way that is natural for a human to read. + +The strategy here is to let the width and indentation of the list marker +determine the indentation necessary for blocks to fall under the list +item, rather than having a fixed and arbitrary number. The writer can +think of the body of the list item as a unit which gets indented to the +right enough to fit the list marker (and any indentation on the list +marker). (The laziness rule, #5, then allows continuation lines to be +unindented if needed.) + +This rule is superior, we claim, to any rule requiring a fixed level of +indentation from the margin. The four-space rule is clear but +unnatural. It is quite unintuitive that + +``` markdown +- foo + + bar + + - baz +``` + +should be parsed as two lists with an intervening paragraph, + +``` html +
    +
  • foo
  • +
+

bar

+
    +
  • baz
  • +
+``` + +as the four-space rule demands, rather than a single list, + +``` html +
    +
  • +

    foo

    +

    bar

    +
      +
    • baz
    • +
    +
  • +
+``` + +The choice of four spaces is arbitrary. It can be learned, but it is +not likely to be guessed, and it trips up beginners regularly. + +Would it help to adopt a two-space rule? The problem is that such +a rule, together with the rule allowing 1--3 spaces indentation of the +initial list marker, allows text that is indented *less than* the +original list marker to be included in the list item. For example, +`Markdown.pl` parses + +``` markdown + - one + + two +``` + +as a single list item, with `two` a continuation paragraph: + +``` html +
    +
  • +

    one

    +

    two

    +
  • +
+``` + +and similarly + +``` markdown +> - one +> +> two +``` + +as + +``` html +
+
    +
  • +

    one

    +

    two

    +
  • +
+
+``` + +This is extremely unintuitive. + +Rather than requiring a fixed indent from the margin, we could require +a fixed indent (say, two spaces, or even one space) from the list marker (which +may itself be indented). This proposal would remove the last anomaly +discussed. Unlike the spec presented above, it would count the following +as a list item with a subparagraph, even though the paragraph `bar` +is not indented as far as the first paragraph `foo`: + +``` markdown + 10. foo + + bar +``` + +Arguably this text does read like a list item with `bar` as a subparagraph, +which may count in favor of the proposal. However, on this proposal indented +code would have to be indented six spaces after the list marker. And this +would break a lot of existing Markdown, which has the pattern: + +``` markdown +1. foo + + indented code +``` + +where the code is indented eight spaces. The spec above, by contrast, will +parse this text as expected, since the code block's indentation is measured +from the beginning of `foo`. + +The one case that needs special treatment is a list item that *starts* +with indented code. How much indentation is required in that case, since +we don't have a "first paragraph" to measure from? Rule #2 simply stipulates +that in such cases, we require one space indentation from the list marker +(and then the normal four spaces for the indented code). This will match the +four-space rule in cases where the list marker plus its initial indentation +takes four spaces (a common case), but diverge in other cases. + +## Lists + +A [list](@) is a sequence of one or more +list items [of the same type]. The list items +may be separated by any number of blank lines. + +Two list items are [of the same type](@) +if they begin with a [list marker] of the same type. +Two list markers are of the +same type if (a) they are bullet list markers using the same character +(`-`, `+`, or `*`) or (b) they are ordered list numbers with the same +delimiter (either `.` or `)`). + +A list is an [ordered list](@) +if its constituent list items begin with +[ordered list markers], and a +[bullet list](@) if its constituent list +items begin with [bullet list markers]. + +The [start number](@) +of an [ordered list] is determined by the list number of +its initial list item. The numbers of subsequent list items are +disregarded. + +A list is [loose](@) if any of its constituent +list items are separated by blank lines, or if any of its constituent +list items directly contain two block-level elements with a blank line +between them. Otherwise a list is [tight](@). +(The difference in HTML output is that paragraphs in a loose list are +wrapped in `

` tags, while paragraphs in a tight list are not.) + +Changing the bullet or ordered list delimiter starts a new list: + +```````````````````````````````` example +- foo +- bar ++ baz +. +

    +
  • foo
  • +
  • bar
  • +
+
    +
  • baz
  • +
+```````````````````````````````` + + +```````````````````````````````` example +1. foo +2. bar +3) baz +. +
    +
  1. foo
  2. +
  3. bar
  4. +
+
    +
  1. baz
  2. +
+```````````````````````````````` + + +In CommonMark, a list can interrupt a paragraph. That is, +no blank line is needed to separate a paragraph from a following +list: + +```````````````````````````````` example +Foo +- bar +- baz +. +

Foo

+
    +
  • bar
  • +
  • baz
  • +
+```````````````````````````````` + +`Markdown.pl` does not allow this, through fear of triggering a list +via a numeral in a hard-wrapped line: + +``` markdown +The number of windows in my house is +14. The number of doors is 6. +``` + +Oddly, though, `Markdown.pl` *does* allow a blockquote to +interrupt a paragraph, even though the same considerations might +apply. + +In CommonMark, we do allow lists to interrupt paragraphs, for +two reasons. First, it is natural and not uncommon for people +to start lists without blank lines: + +``` markdown +I need to buy +- new shoes +- a coat +- a plane ticket +``` + +Second, we are attracted to a + +> [principle of uniformity](@): +> if a chunk of text has a certain +> meaning, it will continue to have the same meaning when put into a +> container block (such as a list item or blockquote). + +(Indeed, the spec for [list items] and [block quotes] presupposes +this principle.) This principle implies that if + +``` markdown + * I need to buy + - new shoes + - a coat + - a plane ticket +``` + +is a list item containing a paragraph followed by a nested sublist, +as all Markdown implementations agree it is (though the paragraph +may be rendered without `

` tags, since the list is "tight"), +then + +``` markdown +I need to buy +- new shoes +- a coat +- a plane ticket +``` + +by itself should be a paragraph followed by a nested sublist. + +Since it is well established Markdown practice to allow lists to +interrupt paragraphs inside list items, the [principle of +uniformity] requires us to allow this outside list items as +well. ([reStructuredText](http://docutils.sourceforge.net/rst.html) +takes a different approach, requiring blank lines before lists +even inside other list items.) + +In order to solve of unwanted lists in paragraphs with +hard-wrapped numerals, we allow only lists starting with `1` to +interrupt paragraphs. Thus, + +```````````````````````````````` example +The number of windows in my house is +14. The number of doors is 6. +. +

The number of windows in my house is +14. The number of doors is 6.

+```````````````````````````````` + +We may still get an unintended result in cases like + +```````````````````````````````` example +The number of windows in my house is +1. The number of doors is 6. +. +

The number of windows in my house is

+
    +
  1. The number of doors is 6.
  2. +
+```````````````````````````````` + +but this rule should prevent most spurious list captures. + +There can be any number of blank lines between items: + +```````````````````````````````` example +- foo + +- bar + + +- baz +. +
    +
  • +

    foo

    +
  • +
  • +

    bar

    +
  • +
  • +

    baz

    +
  • +
+```````````````````````````````` + +```````````````````````````````` example +- foo + - bar + - baz + + + bim +. +
    +
  • foo +
      +
    • bar +
        +
      • +

        baz

        +

        bim

        +
      • +
      +
    • +
    +
  • +
+```````````````````````````````` + + +To separate consecutive lists of the same type, or to separate a +list from an indented code block that would otherwise be parsed +as a subparagraph of the final list item, you can insert a blank HTML +comment: + +```````````````````````````````` example +- foo +- bar + + + +- baz +- bim +. +
    +
  • foo
  • +
  • bar
  • +
+ +
    +
  • baz
  • +
  • bim
  • +
+```````````````````````````````` + + +```````````````````````````````` example +- foo + + notcode + +- foo + + + + code +. +
    +
  • +

    foo

    +

    notcode

    +
  • +
  • +

    foo

    +
  • +
+ +
code
+
+```````````````````````````````` + + +List items need not be indented to the same level. The following +list items will be treated as items at the same list level, +since none is indented enough to belong to the previous list +item: + +```````````````````````````````` example +- a + - b + - c + - d + - e + - f + - g + - h +- i +. +
    +
  • a
  • +
  • b
  • +
  • c
  • +
  • d
  • +
  • e
  • +
  • f
  • +
  • g
  • +
  • h
  • +
  • i
  • +
+```````````````````````````````` + + +```````````````````````````````` example +1. a + + 2. b + + 3. c +. +
    +
  1. +

    a

    +
  2. +
  3. +

    b

    +
  4. +
  5. +

    c

    +
  6. +
+```````````````````````````````` + + +This is a loose list, because there is a blank line between +two of the list items: + +```````````````````````````````` example +- a +- b + +- c +. +
    +
  • +

    a

    +
  • +
  • +

    b

    +
  • +
  • +

    c

    +
  • +
+```````````````````````````````` + + +So is this, with a empty second item: + +```````````````````````````````` example +* a +* + +* c +. +
    +
  • +

    a

    +
  • +
  • +
  • +

    c

    +
  • +
+```````````````````````````````` + + +These are loose lists, even though there is no space between the items, +because one of the items directly contains two block-level elements +with a blank line between them: + +```````````````````````````````` example +- a +- b + + c +- d +. +
    +
  • +

    a

    +
  • +
  • +

    b

    +

    c

    +
  • +
  • +

    d

    +
  • +
+```````````````````````````````` + + +```````````````````````````````` example +- a +- b + + [ref]: /url +- d +. +
    +
  • +

    a

    +
  • +
  • +

    b

    +
  • +
  • +

    d

    +
  • +
+```````````````````````````````` + + +This is a tight list, because the blank lines are in a code block: + +```````````````````````````````` example +- a +- ``` + b + + + ``` +- c +. +
    +
  • a
  • +
  • +
    b
    +
    +
    +
    +
  • +
  • c
  • +
+```````````````````````````````` + + +This is a tight list, because the blank line is between two +paragraphs of a sublist. So the sublist is loose while +the outer list is tight: + +```````````````````````````````` example +- a + - b + + c +- d +. +
    +
  • a +
      +
    • +

      b

      +

      c

      +
    • +
    +
  • +
  • d
  • +
+```````````````````````````````` + + +This is a tight list, because the blank line is inside the +block quote: + +```````````````````````````````` example +* a + > b + > +* c +. +
    +
  • a +
    +

    b

    +
    +
  • +
  • c
  • +
+```````````````````````````````` + + +This list is tight, because the consecutive block elements +are not separated by blank lines: + +```````````````````````````````` example +- a + > b + ``` + c + ``` +- d +. +
    +
  • a +
    +

    b

    +
    +
    c
    +
    +
  • +
  • d
  • +
+```````````````````````````````` + + +A single-paragraph list is tight: + +```````````````````````````````` example +- a +. +
    +
  • a
  • +
+```````````````````````````````` + + +```````````````````````````````` example +- a + - b +. +
    +
  • a +
      +
    • b
    • +
    +
  • +
+```````````````````````````````` + + +This list is loose, because of the blank line between the +two block elements in the list item: + +```````````````````````````````` example +1. ``` + foo + ``` + + bar +. +
    +
  1. +
    foo
    +
    +

    bar

    +
  2. +
+```````````````````````````````` + + +Here the outer list is loose, the inner list tight: + +```````````````````````````````` example +* foo + * bar + + baz +. +
    +
  • +

    foo

    +
      +
    • bar
    • +
    +

    baz

    +
  • +
+```````````````````````````````` + + +```````````````````````````````` example +- a + - b + - c + +- d + - e + - f +. +
    +
  • +

    a

    +
      +
    • b
    • +
    • c
    • +
    +
  • +
  • +

    d

    +
      +
    • e
    • +
    • f
    • +
    +
  • +
+```````````````````````````````` + + +# Inlines + +Inlines are parsed sequentially from the beginning of the character +stream to the end (left to right, in left-to-right languages). +Thus, for example, in + +```````````````````````````````` example +`hi`lo` +. +

hilo`

+```````````````````````````````` + + +`hi` is parsed as code, leaving the backtick at the end as a literal +backtick. + +## Backslash escapes + +Any ASCII punctuation character may be backslash-escaped: + +```````````````````````````````` example +\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~ +. +

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~

+```````````````````````````````` + + +Backslashes before other characters are treated as literal +backslashes: + +```````````````````````````````` example +\→\A\a\ \3\φ\« +. +

\→\A\a\ \3\φ\«

+```````````````````````````````` + + +Escaped characters are treated as regular characters and do +not have their usual Markdown meanings: + +```````````````````````````````` example +\*not emphasized* +\
not a tag +\[not a link](/foo) +\`not code` +1\. not a list +\* not a list +\# not a heading +\[foo]: /url "not a reference" +. +

*not emphasized* +<br/> not a tag +[not a link](/foo) +`not code` +1. not a list +* not a list +# not a heading +[foo]: /url "not a reference"

+```````````````````````````````` + + +If a backslash is itself escaped, the following character is not: + +```````````````````````````````` example +\\*emphasis* +. +

\emphasis

+```````````````````````````````` + + +A backslash at the end of the line is a [hard line break]: + +```````````````````````````````` example +foo\ +bar +. +

foo
+bar

+```````````````````````````````` + + +Backslash escapes do not work in code blocks, code spans, autolinks, or +raw HTML: + +```````````````````````````````` example +`` \[\` `` +. +

\[\`

+```````````````````````````````` + + +```````````````````````````````` example + \[\] +. +
\[\]
+
+```````````````````````````````` + + +```````````````````````````````` example +~~~ +\[\] +~~~ +. +
\[\]
+
+```````````````````````````````` + + +```````````````````````````````` example + +. +

http://example.com?find=\*

+```````````````````````````````` + + +```````````````````````````````` example + +. + +```````````````````````````````` + + +But they work in all other contexts, including URLs and link titles, +link references, and [info strings] in [fenced code blocks]: + +```````````````````````````````` example +[foo](/bar\* "ti\*tle") +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +[foo] + +[foo]: /bar\* "ti\*tle" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +``` foo\+bar +foo +``` +. +
foo
+
+```````````````````````````````` + + + +## Entity and numeric character references + +All valid HTML entity references and numeric character +references, except those occuring in code blocks and code spans, +are recognized as such and treated as equivalent to the +corresponding Unicode characters. Conforming CommonMark parsers +need not store information about whether a particular character +was represented in the source using a Unicode character or +an entity reference. + +[Entity references](@) consist of `&` + any of the valid +HTML5 entity names + `;`. The +document +is used as an authoritative source for the valid entity +references and their corresponding code points. + +```````````````````````````````` example +  & © Æ Ď +¾ ℋ ⅆ +∲ ≧̸ +. +

  & © Æ Ď +¾ ℋ ⅆ +∲ ≧̸

+```````````````````````````````` + + +[Decimal numeric character +references](@) +consist of `&#` + a string of 1--8 arabic digits + `;`. A +numeric character reference is parsed as the corresponding +Unicode character. Invalid Unicode code points will be replaced by +the REPLACEMENT CHARACTER (`U+FFFD`). For security reasons, +the code point `U+0000` will also be replaced by `U+FFFD`. + +```````````````````````````````` example +# Ӓ Ϡ � � +. +

# Ӓ Ϡ � �

+```````````````````````````````` + + +[Hexadecimal numeric character +references](@) consist of `&#` + +either `X` or `x` + a string of 1-8 hexadecimal digits + `;`. +They too are parsed as the corresponding Unicode character (this +time specified with a hexadecimal numeral instead of decimal). + +```````````````````````````````` example +" ആ ಫ +. +

" ആ ಫ

+```````````````````````````````` + + +Here are some nonentities: + +```````````````````````````````` example +  &x; &#; &#x; +&ThisIsNotDefined; &hi?; +. +

&nbsp &x; &#; &#x; +&ThisIsNotDefined; &hi?;

+```````````````````````````````` + + +Although HTML5 does accept some entity references +without a trailing semicolon (such as `©`), these are not +recognized here, because it makes the grammar too ambiguous: + +```````````````````````````````` example +© +. +

&copy

+```````````````````````````````` + + +Strings that are not on the list of HTML5 named entities are not +recognized as entity references either: + +```````````````````````````````` example +&MadeUpEntity; +. +

&MadeUpEntity;

+```````````````````````````````` + + +Entity and numeric character references are recognized in any +context besides code spans or code blocks, including +URLs, [link titles], and [fenced code block][] [info strings]: + +```````````````````````````````` example + +. + +```````````````````````````````` + + +```````````````````````````````` example +[foo](/föö "föö") +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +[foo] + +[foo]: /föö "föö" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +``` föö +foo +``` +. +
foo
+
+```````````````````````````````` + + +Entity and numeric character references are treated as literal +text in code spans and code blocks: + +```````````````````````````````` example +`föö` +. +

f&ouml;&ouml;

+```````````````````````````````` + + +```````````````````````````````` example + föfö +. +
f&ouml;f&ouml;
+
+```````````````````````````````` + + +## Code spans + +A [backtick string](@) +is a string of one or more backtick characters (`` ` ``) that is neither +preceded nor followed by a backtick. + +A [code span](@) begins with a backtick string and ends with +a backtick string of equal length. The contents of the code span are +the characters between the two backtick strings, with leading and +trailing spaces and [line endings] removed, and +[whitespace] collapsed to single spaces. + +This is a simple code span: + +```````````````````````````````` example +`foo` +. +

foo

+```````````````````````````````` + + +Here two backticks are used, because the code contains a backtick. +This example also illustrates stripping of leading and trailing spaces: + +```````````````````````````````` example +`` foo ` bar `` +. +

foo ` bar

+```````````````````````````````` + + +This example shows the motivation for stripping leading and trailing +spaces: + +```````````````````````````````` example +` `` ` +. +

``

+```````````````````````````````` + + +[Line endings] are treated like spaces: + +```````````````````````````````` example +`` +foo +`` +. +

foo

+```````````````````````````````` + + +Interior spaces and [line endings] are collapsed into +single spaces, just as they would be by a browser: + +```````````````````````````````` example +`foo bar + baz` +. +

foo bar baz

+```````````````````````````````` + + +Not all [Unicode whitespace] (for instance, non-breaking space) is +collapsed, however: + +```````````````````````````````` example +`a  b` +. +

a  b

+```````````````````````````````` + + +Q: Why not just leave the spaces, since browsers will collapse them +anyway? A: Because we might be targeting a non-HTML format, and we +shouldn't rely on HTML-specific rendering assumptions. + +(Existing implementations differ in their treatment of internal +spaces and [line endings]. Some, including `Markdown.pl` and +`showdown`, convert an internal [line ending] into a +`
` tag. But this makes things difficult for those who like to +hard-wrap their paragraphs, since a line break in the midst of a code +span will cause an unintended line break in the output. Others just +leave internal spaces as they are, which is fine if only HTML is being +targeted.) + +```````````````````````````````` example +`foo `` bar` +. +

foo `` bar

+```````````````````````````````` + + +Note that backslash escapes do not work in code spans. All backslashes +are treated literally: + +```````````````````````````````` example +`foo\`bar` +. +

foo\bar`

+```````````````````````````````` + + +Backslash escapes are never needed, because one can always choose a +string of *n* backtick characters as delimiters, where the code does +not contain any strings of exactly *n* backtick characters. + +Code span backticks have higher precedence than any other inline +constructs except HTML tags and autolinks. Thus, for example, this is +not parsed as emphasized text, since the second `*` is part of a code +span: + +```````````````````````````````` example +*foo`*` +. +

*foo*

+```````````````````````````````` + + +And this is not parsed as a link: + +```````````````````````````````` example +[not a `link](/foo`) +. +

[not a link](/foo)

+```````````````````````````````` + + +Code spans, HTML tags, and autolinks have the same precedence. +Thus, this is code: + +```````````````````````````````` example +`` +. +

<a href="">`

+```````````````````````````````` + + +But this is an HTML tag: + +```````````````````````````````` example +
` +. +

`

+```````````````````````````````` + + +And this is code: + +```````````````````````````````` example +`` +. +

<http://foo.bar.baz>`

+```````````````````````````````` + + +But this is an autolink: + +```````````````````````````````` example +` +. +

http://foo.bar.`baz`

+```````````````````````````````` + + +When a backtick string is not closed by a matching backtick string, +we just have literal backticks: + +```````````````````````````````` example +```foo`` +. +

```foo``

+```````````````````````````````` + + +```````````````````````````````` example +`foo +. +

`foo

+```````````````````````````````` + + +## Emphasis and strong emphasis + +John Gruber's original [Markdown syntax +description](http://daringfireball.net/projects/markdown/syntax#em) says: + +> Markdown treats asterisks (`*`) and underscores (`_`) as indicators of +> emphasis. Text wrapped with one `*` or `_` will be wrapped with an HTML +> `` tag; double `*`'s or `_`'s will be wrapped with an HTML `` +> tag. + +This is enough for most users, but these rules leave much undecided, +especially when it comes to nested emphasis. The original +`Markdown.pl` test suite makes it clear that triple `***` and +`___` delimiters can be used for strong emphasis, and most +implementations have also allowed the following patterns: + +``` markdown +***strong emph*** +***strong** in emph* +***emph* in strong** +**in strong *emph*** +*in emph **strong*** +``` + +The following patterns are less widely supported, but the intent +is clear and they are useful (especially in contexts like bibliography +entries): + +``` markdown +*emph *with emph* in it* +**strong **with strong** in it** +``` + +Many implementations have also restricted intraword emphasis to +the `*` forms, to avoid unwanted emphasis in words containing +internal underscores. (It is best practice to put these in code +spans, but users often do not.) + +``` markdown +internal emphasis: foo*bar*baz +no emphasis: foo_bar_baz +``` + +The rules given below capture all of these patterns, while allowing +for efficient parsing strategies that do not backtrack. + +First, some definitions. A [delimiter run](@) is either +a sequence of one or more `*` characters that is not preceded or +followed by a `*` character, or a sequence of one or more `_` +characters that is not preceded or followed by a `_` character. + +A [left-flanking delimiter run](@) is +a [delimiter run] that is (a) not followed by [Unicode whitespace], +and (b) either not followed by a [punctuation character], or +preceded by [Unicode whitespace] or a [punctuation character]. +For purposes of this definition, the beginning and the end of +the line count as Unicode whitespace. + +A [right-flanking delimiter run](@) is +a [delimiter run] that is (a) not preceded by [Unicode whitespace], +and (b) either not preceded by a [punctuation character], or +followed by [Unicode whitespace] or a [punctuation character]. +For purposes of this definition, the beginning and the end of +the line count as Unicode whitespace. + +Here are some examples of delimiter runs. + + - left-flanking but not right-flanking: + + ``` + ***abc + _abc + **"abc" + _"abc" + ``` + + - right-flanking but not left-flanking: + + ``` + abc*** + abc_ + "abc"** + "abc"_ + ``` + + - Both left and right-flanking: + + ``` + abc***def + "abc"_"def" + ``` + + - Neither left nor right-flanking: + + ``` + abc *** def + a _ b + ``` + +(The idea of distinguishing left-flanking and right-flanking +delimiter runs based on the character before and the character +after comes from Roopesh Chander's +[vfmd](http://www.vfmd.org/vfmd-spec/specification/#procedure-for-identifying-emphasis-tags). +vfmd uses the terminology "emphasis indicator string" instead of "delimiter +run," and its rules for distinguishing left- and right-flanking runs +are a bit more complex than the ones given here.) + +The following rules define emphasis and strong emphasis: + +1. A single `*` character [can open emphasis](@) + iff (if and only if) it is part of a [left-flanking delimiter run]. + +2. A single `_` character [can open emphasis] iff + it is part of a [left-flanking delimiter run] + and either (a) not part of a [right-flanking delimiter run] + or (b) part of a [right-flanking delimiter run] + preceded by punctuation. + +3. A single `*` character [can close emphasis](@) + iff it is part of a [right-flanking delimiter run]. + +4. A single `_` character [can close emphasis] iff + it is part of a [right-flanking delimiter run] + and either (a) not part of a [left-flanking delimiter run] + or (b) part of a [left-flanking delimiter run] + followed by punctuation. + +5. A double `**` [can open strong emphasis](@) + iff it is part of a [left-flanking delimiter run]. + +6. A double `__` [can open strong emphasis] iff + it is part of a [left-flanking delimiter run] + and either (a) not part of a [right-flanking delimiter run] + or (b) part of a [right-flanking delimiter run] + preceded by punctuation. + +7. A double `**` [can close strong emphasis](@) + iff it is part of a [right-flanking delimiter run]. + +8. A double `__` [can close strong emphasis] + it is part of a [right-flanking delimiter run] + and either (a) not part of a [left-flanking delimiter run] + or (b) part of a [left-flanking delimiter run] + followed by punctuation. + +9. Emphasis begins with a delimiter that [can open emphasis] and ends + with a delimiter that [can close emphasis], and that uses the same + character (`_` or `*`) as the opening delimiter. The + opening and closing delimiters must belong to separate + [delimiter runs]. If one of the delimiters can both + open and close emphasis, then the sum of the lengths of the + delimiter runs containing the opening and closing delimiters + must not be a multiple of 3. + +10. Strong emphasis begins with a delimiter that + [can open strong emphasis] and ends with a delimiter that + [can close strong emphasis], and that uses the same character + (`_` or `*`) as the opening delimiter. The + opening and closing delimiters must belong to separate + [delimiter runs]. If one of the delimiters can both open + and close strong emphasis, then the sum of the lengths of + the delimiter runs containing the opening and closing + delimiters must not be a multiple of 3. + +11. A literal `*` character cannot occur at the beginning or end of + `*`-delimited emphasis or `**`-delimited strong emphasis, unless it + is backslash-escaped. + +12. A literal `_` character cannot occur at the beginning or end of + `_`-delimited emphasis or `__`-delimited strong emphasis, unless it + is backslash-escaped. + +Where rules 1--12 above are compatible with multiple parsings, +the following principles resolve ambiguity: + +13. The number of nestings should be minimized. Thus, for example, + an interpretation `...` is always preferred to + `...`. + +14. An interpretation `...` is always + preferred to `..`. + +15. When two potential emphasis or strong emphasis spans overlap, + so that the second begins before the first ends and ends after + the first ends, the first takes precedence. Thus, for example, + `*foo _bar* baz_` is parsed as `foo _bar baz_` rather + than `*foo bar* baz`. + +16. When there are two potential emphasis or strong emphasis spans + with the same closing delimiter, the shorter one (the one that + opens later) takes precedence. Thus, for example, + `**foo **bar baz**` is parsed as `**foo bar baz` + rather than `foo **bar baz`. + +17. Inline code spans, links, images, and HTML tags group more tightly + than emphasis. So, when there is a choice between an interpretation + that contains one of these elements and one that does not, the + former always wins. Thus, for example, `*[foo*](bar)` is + parsed as `*foo*` rather than as + `[foo](bar)`. + +These rules can be illustrated through a series of examples. + +Rule 1: + +```````````````````````````````` example +*foo bar* +. +

foo bar

+```````````````````````````````` + + +This is not emphasis, because the opening `*` is followed by +whitespace, and hence not part of a [left-flanking delimiter run]: + +```````````````````````````````` example +a * foo bar* +. +

a * foo bar*

+```````````````````````````````` + + +This is not emphasis, because the opening `*` is preceded +by an alphanumeric and followed by punctuation, and hence +not part of a [left-flanking delimiter run]: + +```````````````````````````````` example +a*"foo"* +. +

a*"foo"*

+```````````````````````````````` + + +Unicode nonbreaking spaces count as whitespace, too: + +```````````````````````````````` example +* a * +. +

* a *

+```````````````````````````````` + + +Intraword emphasis with `*` is permitted: + +```````````````````````````````` example +foo*bar* +. +

foobar

+```````````````````````````````` + + +```````````````````````````````` example +5*6*78 +. +

5678

+```````````````````````````````` + + +Rule 2: + +```````````````````````````````` example +_foo bar_ +. +

foo bar

+```````````````````````````````` + + +This is not emphasis, because the opening `_` is followed by +whitespace: + +```````````````````````````````` example +_ foo bar_ +. +

_ foo bar_

+```````````````````````````````` + + +This is not emphasis, because the opening `_` is preceded +by an alphanumeric and followed by punctuation: + +```````````````````````````````` example +a_"foo"_ +. +

a_"foo"_

+```````````````````````````````` + + +Emphasis with `_` is not allowed inside words: + +```````````````````````````````` example +foo_bar_ +. +

foo_bar_

+```````````````````````````````` + + +```````````````````````````````` example +5_6_78 +. +

5_6_78

+```````````````````````````````` + + +```````````````````````````````` example +пристаням_стремятся_ +. +

пристаням_стремятся_

+```````````````````````````````` + + +Here `_` does not generate emphasis, because the first delimiter run +is right-flanking and the second left-flanking: + +```````````````````````````````` example +aa_"bb"_cc +. +

aa_"bb"_cc

+```````````````````````````````` + + +This is emphasis, even though the opening delimiter is +both left- and right-flanking, because it is preceded by +punctuation: + +```````````````````````````````` example +foo-_(bar)_ +. +

foo-(bar)

+```````````````````````````````` + + +Rule 3: + +This is not emphasis, because the closing delimiter does +not match the opening delimiter: + +```````````````````````````````` example +_foo* +. +

_foo*

+```````````````````````````````` + + +This is not emphasis, because the closing `*` is preceded by +whitespace: + +```````````````````````````````` example +*foo bar * +. +

*foo bar *

+```````````````````````````````` + + +A newline also counts as whitespace: + +```````````````````````````````` example +*foo bar +* +. +

*foo bar +*

+```````````````````````````````` + + +This is not emphasis, because the second `*` is +preceded by punctuation and followed by an alphanumeric +(hence it is not part of a [right-flanking delimiter run]: + +```````````````````````````````` example +*(*foo) +. +

*(*foo)

+```````````````````````````````` + + +The point of this restriction is more easily appreciated +with this example: + +```````````````````````````````` example +*(*foo*)* +. +

(foo)

+```````````````````````````````` + + +Intraword emphasis with `*` is allowed: + +```````````````````````````````` example +*foo*bar +. +

foobar

+```````````````````````````````` + + + +Rule 4: + +This is not emphasis, because the closing `_` is preceded by +whitespace: + +```````````````````````````````` example +_foo bar _ +. +

_foo bar _

+```````````````````````````````` + + +This is not emphasis, because the second `_` is +preceded by punctuation and followed by an alphanumeric: + +```````````````````````````````` example +_(_foo) +. +

_(_foo)

+```````````````````````````````` + + +This is emphasis within emphasis: + +```````````````````````````````` example +_(_foo_)_ +. +

(foo)

+```````````````````````````````` + + +Intraword emphasis is disallowed for `_`: + +```````````````````````````````` example +_foo_bar +. +

_foo_bar

+```````````````````````````````` + + +```````````````````````````````` example +_пристаням_стремятся +. +

_пристаням_стремятся

+```````````````````````````````` + + +```````````````````````````````` example +_foo_bar_baz_ +. +

foo_bar_baz

+```````````````````````````````` + + +This is emphasis, even though the closing delimiter is +both left- and right-flanking, because it is followed by +punctuation: + +```````````````````````````````` example +_(bar)_. +. +

(bar).

+```````````````````````````````` + + +Rule 5: + +```````````````````````````````` example +**foo bar** +. +

foo bar

+```````````````````````````````` + + +This is not strong emphasis, because the opening delimiter is +followed by whitespace: + +```````````````````````````````` example +** foo bar** +. +

** foo bar**

+```````````````````````````````` + + +This is not strong emphasis, because the opening `**` is preceded +by an alphanumeric and followed by punctuation, and hence +not part of a [left-flanking delimiter run]: + +```````````````````````````````` example +a**"foo"** +. +

a**"foo"**

+```````````````````````````````` + + +Intraword strong emphasis with `**` is permitted: + +```````````````````````````````` example +foo**bar** +. +

foobar

+```````````````````````````````` + + +Rule 6: + +```````````````````````````````` example +__foo bar__ +. +

foo bar

+```````````````````````````````` + + +This is not strong emphasis, because the opening delimiter is +followed by whitespace: + +```````````````````````````````` example +__ foo bar__ +. +

__ foo bar__

+```````````````````````````````` + + +A newline counts as whitespace: +```````````````````````````````` example +__ +foo bar__ +. +

__ +foo bar__

+```````````````````````````````` + + +This is not strong emphasis, because the opening `__` is preceded +by an alphanumeric and followed by punctuation: + +```````````````````````````````` example +a__"foo"__ +. +

a__"foo"__

+```````````````````````````````` + + +Intraword strong emphasis is forbidden with `__`: + +```````````````````````````````` example +foo__bar__ +. +

foo__bar__

+```````````````````````````````` + + +```````````````````````````````` example +5__6__78 +. +

5__6__78

+```````````````````````````````` + + +```````````````````````````````` example +пристаням__стремятся__ +. +

пристаням__стремятся__

+```````````````````````````````` + + +```````````````````````````````` example +__foo, __bar__, baz__ +. +

foo, bar, baz

+```````````````````````````````` + + +This is strong emphasis, even though the opening delimiter is +both left- and right-flanking, because it is preceded by +punctuation: + +```````````````````````````````` example +foo-__(bar)__ +. +

foo-(bar)

+```````````````````````````````` + + + +Rule 7: + +This is not strong emphasis, because the closing delimiter is preceded +by whitespace: + +```````````````````````````````` example +**foo bar ** +. +

**foo bar **

+```````````````````````````````` + + +(Nor can it be interpreted as an emphasized `*foo bar *`, because of +Rule 11.) + +This is not strong emphasis, because the second `**` is +preceded by punctuation and followed by an alphanumeric: + +```````````````````````````````` example +**(**foo) +. +

**(**foo)

+```````````````````````````````` + + +The point of this restriction is more easily appreciated +with these examples: + +```````````````````````````````` example +*(**foo**)* +. +

(foo)

+```````````````````````````````` + + +```````````````````````````````` example +**Gomphocarpus (*Gomphocarpus physocarpus*, syn. +*Asclepias physocarpa*)** +. +

Gomphocarpus (Gomphocarpus physocarpus, syn. +Asclepias physocarpa)

+```````````````````````````````` + + +```````````````````````````````` example +**foo "*bar*" foo** +. +

foo "bar" foo

+```````````````````````````````` + + +Intraword emphasis: + +```````````````````````````````` example +**foo**bar +. +

foobar

+```````````````````````````````` + + +Rule 8: + +This is not strong emphasis, because the closing delimiter is +preceded by whitespace: + +```````````````````````````````` example +__foo bar __ +. +

__foo bar __

+```````````````````````````````` + + +This is not strong emphasis, because the second `__` is +preceded by punctuation and followed by an alphanumeric: + +```````````````````````````````` example +__(__foo) +. +

__(__foo)

+```````````````````````````````` + + +The point of this restriction is more easily appreciated +with this example: + +```````````````````````````````` example +_(__foo__)_ +. +

(foo)

+```````````````````````````````` + + +Intraword strong emphasis is forbidden with `__`: + +```````````````````````````````` example +__foo__bar +. +

__foo__bar

+```````````````````````````````` + + +```````````````````````````````` example +__пристаням__стремятся +. +

__пристаням__стремятся

+```````````````````````````````` + + +```````````````````````````````` example +__foo__bar__baz__ +. +

foo__bar__baz

+```````````````````````````````` + + +This is strong emphasis, even though the closing delimiter is +both left- and right-flanking, because it is followed by +punctuation: + +```````````````````````````````` example +__(bar)__. +. +

(bar).

+```````````````````````````````` + + +Rule 9: + +Any nonempty sequence of inline elements can be the contents of an +emphasized span. + +```````````````````````````````` example +*foo [bar](/url)* +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo +bar* +. +

foo +bar

+```````````````````````````````` + + +In particular, emphasis and strong emphasis can be nested +inside emphasis: + +```````````````````````````````` example +_foo __bar__ baz_ +. +

foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +_foo _bar_ baz_ +. +

foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +__foo_ bar_ +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo *bar** +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo **bar** baz* +. +

foo bar baz

+```````````````````````````````` + +```````````````````````````````` example +*foo**bar**baz* +. +

foobarbaz

+```````````````````````````````` + +Note that in the preceding case, the interpretation + +``` markdown +

foobarbaz

+``` + + +is precluded by the condition that a delimiter that +can both open and close (like the `*` after `foo`) +cannot form emphasis if the sum of the lengths of +the delimiter runs containing the opening and +closing delimiters is a multiple of 3. + +The same condition ensures that the following +cases are all strong emphasis nested inside +emphasis, even when the interior spaces are +omitted: + + +```````````````````````````````` example +***foo** bar* +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo **bar*** +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo**bar*** +. +

foobar

+```````````````````````````````` + + +Indefinite levels of nesting are possible: + +```````````````````````````````` example +*foo **bar *baz* bim** bop* +. +

foo bar baz bim bop

+```````````````````````````````` + + +```````````````````````````````` example +*foo [*bar*](/url)* +. +

foo bar

+```````````````````````````````` + + +There can be no empty emphasis or strong emphasis: + +```````````````````````````````` example +** is not an empty emphasis +. +

** is not an empty emphasis

+```````````````````````````````` + + +```````````````````````````````` example +**** is not an empty strong emphasis +. +

**** is not an empty strong emphasis

+```````````````````````````````` + + + +Rule 10: + +Any nonempty sequence of inline elements can be the contents of an +strongly emphasized span. + +```````````````````````````````` example +**foo [bar](/url)** +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +**foo +bar** +. +

foo +bar

+```````````````````````````````` + + +In particular, emphasis and strong emphasis can be nested +inside strong emphasis: + +```````````````````````````````` example +__foo _bar_ baz__ +. +

foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +__foo __bar__ baz__ +. +

foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +____foo__ bar__ +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +**foo **bar**** +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +**foo *bar* baz** +. +

foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +**foo*bar*baz** +. +

foobarbaz

+```````````````````````````````` + + +```````````````````````````````` example +***foo* bar** +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +**foo *bar*** +. +

foo bar

+```````````````````````````````` + + +Indefinite levels of nesting are possible: + +```````````````````````````````` example +**foo *bar **baz** +bim* bop** +. +

foo bar baz +bim bop

+```````````````````````````````` + + +```````````````````````````````` example +**foo [*bar*](/url)** +. +

foo bar

+```````````````````````````````` + + +There can be no empty emphasis or strong emphasis: + +```````````````````````````````` example +__ is not an empty emphasis +. +

__ is not an empty emphasis

+```````````````````````````````` + + +```````````````````````````````` example +____ is not an empty strong emphasis +. +

____ is not an empty strong emphasis

+```````````````````````````````` + + + +Rule 11: + +```````````````````````````````` example +foo *** +. +

foo ***

+```````````````````````````````` + + +```````````````````````````````` example +foo *\** +. +

foo *

+```````````````````````````````` + + +```````````````````````````````` example +foo *_* +. +

foo _

+```````````````````````````````` + + +```````````````````````````````` example +foo ***** +. +

foo *****

+```````````````````````````````` + + +```````````````````````````````` example +foo **\*** +. +

foo *

+```````````````````````````````` + + +```````````````````````````````` example +foo **_** +. +

foo _

+```````````````````````````````` + + +Note that when delimiters do not match evenly, Rule 11 determines +that the excess literal `*` characters will appear outside of the +emphasis, rather than inside it: + +```````````````````````````````` example +**foo* +. +

*foo

+```````````````````````````````` + + +```````````````````````````````` example +*foo** +. +

foo*

+```````````````````````````````` + + +```````````````````````````````` example +***foo** +. +

*foo

+```````````````````````````````` + + +```````````````````````````````` example +****foo* +. +

***foo

+```````````````````````````````` + + +```````````````````````````````` example +**foo*** +. +

foo*

+```````````````````````````````` + + +```````````````````````````````` example +*foo**** +. +

foo***

+```````````````````````````````` + + + +Rule 12: + +```````````````````````````````` example +foo ___ +. +

foo ___

+```````````````````````````````` + + +```````````````````````````````` example +foo _\__ +. +

foo _

+```````````````````````````````` + + +```````````````````````````````` example +foo _*_ +. +

foo *

+```````````````````````````````` + + +```````````````````````````````` example +foo _____ +. +

foo _____

+```````````````````````````````` + + +```````````````````````````````` example +foo __\___ +. +

foo _

+```````````````````````````````` + + +```````````````````````````````` example +foo __*__ +. +

foo *

+```````````````````````````````` + + +```````````````````````````````` example +__foo_ +. +

_foo

+```````````````````````````````` + + +Note that when delimiters do not match evenly, Rule 12 determines +that the excess literal `_` characters will appear outside of the +emphasis, rather than inside it: + +```````````````````````````````` example +_foo__ +. +

foo_

+```````````````````````````````` + + +```````````````````````````````` example +___foo__ +. +

_foo

+```````````````````````````````` + + +```````````````````````````````` example +____foo_ +. +

___foo

+```````````````````````````````` + + +```````````````````````````````` example +__foo___ +. +

foo_

+```````````````````````````````` + + +```````````````````````````````` example +_foo____ +. +

foo___

+```````````````````````````````` + + +Rule 13 implies that if you want emphasis nested directly inside +emphasis, you must use different delimiters: + +```````````````````````````````` example +**foo** +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +*_foo_* +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +__foo__ +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +_*foo*_ +. +

foo

+```````````````````````````````` + + +However, strong emphasis within strong emphasis is possible without +switching delimiters: + +```````````````````````````````` example +****foo**** +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +____foo____ +. +

foo

+```````````````````````````````` + + + +Rule 13 can be applied to arbitrarily long sequences of +delimiters: + +```````````````````````````````` example +******foo****** +. +

foo

+```````````````````````````````` + + +Rule 14: + +```````````````````````````````` example +***foo*** +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +_____foo_____ +. +

foo

+```````````````````````````````` + + +Rule 15: + +```````````````````````````````` example +*foo _bar* baz_ +. +

foo _bar baz_

+```````````````````````````````` + + +```````````````````````````````` example +*foo __bar *baz bim__ bam* +. +

foo bar *baz bim bam

+```````````````````````````````` + + +Rule 16: + +```````````````````````````````` example +**foo **bar baz** +. +

**foo bar baz

+```````````````````````````````` + + +```````````````````````````````` example +*foo *bar baz* +. +

*foo bar baz

+```````````````````````````````` + + +Rule 17: + +```````````````````````````````` example +*[bar*](/url) +. +

*bar*

+```````````````````````````````` + + +```````````````````````````````` example +_foo [bar_](/url) +. +

_foo bar_

+```````````````````````````````` + + +```````````````````````````````` example +* +. +

*

+```````````````````````````````` + + +```````````````````````````````` example +** +. +

**

+```````````````````````````````` + + +```````````````````````````````` example +__ +. +

__

+```````````````````````````````` + + +```````````````````````````````` example +*a `*`* +. +

a *

+```````````````````````````````` + + +```````````````````````````````` example +_a `_`_ +. +

a _

+```````````````````````````````` + + +```````````````````````````````` example +**a +. +

**ahttp://foo.bar/?q=**

+```````````````````````````````` + + +```````````````````````````````` example +__a +. +

__ahttp://foo.bar/?q=__

+```````````````````````````````` + + + +## Links + +A link contains [link text] (the visible text), a [link destination] +(the URI that is the link destination), and optionally a [link title]. +There are two basic kinds of links in Markdown. In [inline links] the +destination and title are given immediately after the link text. In +[reference links] the destination and title are defined elsewhere in +the document. + +A [link text](@) consists of a sequence of zero or more +inline elements enclosed by square brackets (`[` and `]`). The +following rules apply: + +- Links may not contain other links, at any level of nesting. If + multiple otherwise valid link definitions appear nested inside each + other, the inner-most definition is used. + +- Brackets are allowed in the [link text] only if (a) they + are backslash-escaped or (b) they appear as a matched pair of brackets, + with an open bracket `[`, a sequence of zero or more inlines, and + a close bracket `]`. + +- Backtick [code spans], [autolinks], and raw [HTML tags] bind more tightly + than the brackets in link text. Thus, for example, + `` [foo`]` `` could not be a link text, since the second `]` + is part of a code span. + +- The brackets in link text bind more tightly than markers for + [emphasis and strong emphasis]. Thus, for example, `*[foo*](url)` is a link. + +A [link destination](@) consists of either + +- a sequence of zero or more characters between an opening `<` and a + closing `>` that contains no spaces, line breaks, or unescaped + `<` or `>` characters, or + +- a nonempty sequence of characters that does not include + ASCII space or control characters, and includes parentheses + only if (a) they are backslash-escaped or (b) they are part of + a balanced pair of unescaped parentheses. + +A [link title](@) consists of either + +- a sequence of zero or more characters between straight double-quote + characters (`"`), including a `"` character only if it is + backslash-escaped, or + +- a sequence of zero or more characters between straight single-quote + characters (`'`), including a `'` character only if it is + backslash-escaped, or + +- a sequence of zero or more characters between matching parentheses + (`(...)`), including a `)` character only if it is backslash-escaped. + +Although [link titles] may span multiple lines, they may not contain +a [blank line]. + +An [inline link](@) consists of a [link text] followed immediately +by a left parenthesis `(`, optional [whitespace], an optional +[link destination], an optional [link title] separated from the link +destination by [whitespace], optional [whitespace], and a right +parenthesis `)`. The link's text consists of the inlines contained +in the [link text] (excluding the enclosing square brackets). +The link's URI consists of the link destination, excluding enclosing +`<...>` if present, with backslash-escapes in effect as described +above. The link's title consists of the link title, excluding its +enclosing delimiters, with backslash-escapes in effect as described +above. + +Here is a simple inline link: + +```````````````````````````````` example +[link](/uri "title") +. +

link

+```````````````````````````````` + + +The title may be omitted: + +```````````````````````````````` example +[link](/uri) +. +

link

+```````````````````````````````` + + +Both the title and the destination may be omitted: + +```````````````````````````````` example +[link]() +. +

link

+```````````````````````````````` + + +```````````````````````````````` example +[link](<>) +. +

link

+```````````````````````````````` + + +The destination cannot contain spaces or line breaks, +even if enclosed in pointy brackets: + +```````````````````````````````` example +[link](/my uri) +. +

[link](/my uri)

+```````````````````````````````` + + +```````````````````````````````` example +[link]() +. +

[link](</my uri>)

+```````````````````````````````` + + +```````````````````````````````` example +[link](foo +bar) +. +

[link](foo +bar)

+```````````````````````````````` + + +```````````````````````````````` example +[link]() +. +

[link]()

+```````````````````````````````` + +Parentheses inside the link destination may be escaped: + +```````````````````````````````` example +[link](\(foo\)) +. +

link

+```````````````````````````````` + +Any number parentheses are allowed without escaping, as long as they are +balanced: + +```````````````````````````````` example +[link](foo(and(bar))) +. +

link

+```````````````````````````````` + +However, if you have unbalanced parentheses, you need to escape or use the +`<...>` form: + +```````````````````````````````` example +[link](foo\(and\(bar\)) +. +

link

+```````````````````````````````` + + +```````````````````````````````` example +[link]() +. +

link

+```````````````````````````````` + + +Parentheses and other symbols can also be escaped, as usual +in Markdown: + +```````````````````````````````` example +[link](foo\)\:) +. +

link

+```````````````````````````````` + + +A link can contain fragment identifiers and queries: + +```````````````````````````````` example +[link](#fragment) + +[link](http://example.com#fragment) + +[link](http://example.com?foo=3#frag) +. +

link

+

link

+

link

+```````````````````````````````` + + +Note that a backslash before a non-escapable character is +just a backslash: + +```````````````````````````````` example +[link](foo\bar) +. +

link

+```````````````````````````````` + + +URL-escaping should be left alone inside the destination, as all +URL-escaped characters are also valid URL characters. Entity and +numerical character references in the destination will be parsed +into the corresponding Unicode code points, as usual. These may +be optionally URL-escaped when written as HTML, but this spec +does not enforce any particular policy for rendering URLs in +HTML or other formats. Renderers may make different decisions +about how to escape or normalize URLs in the output. + +```````````````````````````````` example +[link](foo%20bä) +. +

link

+```````````````````````````````` + + +Note that, because titles can often be parsed as destinations, +if you try to omit the destination and keep the title, you'll +get unexpected results: + +```````````````````````````````` example +[link]("title") +. +

link

+```````````````````````````````` + + +Titles may be in single quotes, double quotes, or parentheses: + +```````````````````````````````` example +[link](/url "title") +[link](/url 'title') +[link](/url (title)) +. +

link +link +link

+```````````````````````````````` + + +Backslash escapes and entity and numeric character references +may be used in titles: + +```````````````````````````````` example +[link](/url "title \""") +. +

link

+```````````````````````````````` + + +Titles must be separated from the link using a [whitespace]. +Other [Unicode whitespace] like non-breaking space doesn't work. + +```````````````````````````````` example +[link](/url "title") +. +

link

+```````````````````````````````` + + +Nested balanced quotes are not allowed without escaping: + +```````````````````````````````` example +[link](/url "title "and" title") +. +

[link](/url "title "and" title")

+```````````````````````````````` + + +But it is easy to work around this by using a different quote type: + +```````````````````````````````` example +[link](/url 'title "and" title') +. +

link

+```````````````````````````````` + + +(Note: `Markdown.pl` did allow double quotes inside a double-quoted +title, and its test suite included a test demonstrating this. +But it is hard to see a good rationale for the extra complexity this +brings, since there are already many ways---backslash escaping, +entity and numeric character references, or using a different +quote type for the enclosing title---to write titles containing +double quotes. `Markdown.pl`'s handling of titles has a number +of other strange features. For example, it allows single-quoted +titles in inline links, but not reference links. And, in +reference links but not inline links, it allows a title to begin +with `"` and end with `)`. `Markdown.pl` 1.0.1 even allows +titles with no closing quotation mark, though 1.0.2b8 does not. +It seems preferable to adopt a simple, rational rule that works +the same way in inline links and link reference definitions.) + +[Whitespace] is allowed around the destination and title: + +```````````````````````````````` example +[link]( /uri + "title" ) +. +

link

+```````````````````````````````` + + +But it is not allowed between the link text and the +following parenthesis: + +```````````````````````````````` example +[link] (/uri) +. +

[link] (/uri)

+```````````````````````````````` + + +The link text may contain balanced brackets, but not unbalanced ones, +unless they are escaped: + +```````````````````````````````` example +[link [foo [bar]]](/uri) +. +

link [foo [bar]]

+```````````````````````````````` + + +```````````````````````````````` example +[link] bar](/uri) +. +

[link] bar](/uri)

+```````````````````````````````` + + +```````````````````````````````` example +[link [bar](/uri) +. +

[link bar

+```````````````````````````````` + + +```````````````````````````````` example +[link \[bar](/uri) +. +

link [bar

+```````````````````````````````` + + +The link text may contain inline content: + +```````````````````````````````` example +[link *foo **bar** `#`*](/uri) +. +

link foo bar #

+```````````````````````````````` + + +```````````````````````````````` example +[![moon](moon.jpg)](/uri) +. +

moon

+```````````````````````````````` + + +However, links may not contain other links, at any level of nesting. + +```````````````````````````````` example +[foo [bar](/uri)](/uri) +. +

[foo bar](/uri)

+```````````````````````````````` + + +```````````````````````````````` example +[foo *[bar [baz](/uri)](/uri)*](/uri) +. +

[foo [bar baz](/uri)](/uri)

+```````````````````````````````` + + +```````````````````````````````` example +![[[foo](uri1)](uri2)](uri3) +. +

[foo](uri2)

+```````````````````````````````` + + +These cases illustrate the precedence of link text grouping over +emphasis grouping: + +```````````````````````````````` example +*[foo*](/uri) +. +

*foo*

+```````````````````````````````` + + +```````````````````````````````` example +[foo *bar](baz*) +. +

foo *bar

+```````````````````````````````` + + +Note that brackets that *aren't* part of links do not take +precedence: + +```````````````````````````````` example +*foo [bar* baz] +. +

foo [bar baz]

+```````````````````````````````` + + +These cases illustrate the precedence of HTML tags, code spans, +and autolinks over link grouping: + +```````````````````````````````` example +[foo +. +

[foo

+```````````````````````````````` + + +```````````````````````````````` example +[foo`](/uri)` +. +

[foo](/uri)

+```````````````````````````````` + + +```````````````````````````````` example +[foo +. +

[foohttp://example.com/?search=](uri)

+```````````````````````````````` + + +There are three kinds of [reference link](@)s: +[full](#full-reference-link), [collapsed](#collapsed-reference-link), +and [shortcut](#shortcut-reference-link). + +A [full reference link](@) +consists of a [link text] immediately followed by a [link label] +that [matches] a [link reference definition] elsewhere in the document. + +A [link label](@) begins with a left bracket (`[`) and ends +with the first right bracket (`]`) that is not backslash-escaped. +Between these brackets there must be at least one [non-whitespace character]. +Unescaped square bracket characters are not allowed in +[link labels]. A link label can have at most 999 +characters inside the square brackets. + +One label [matches](@) +another just in case their normalized forms are equal. To normalize a +label, perform the *Unicode case fold* and collapse consecutive internal +[whitespace] to a single space. If there are multiple +matching reference link definitions, the one that comes first in the +document is used. (It is desirable in such cases to emit a warning.) + +The contents of the first link label are parsed as inlines, which are +used as the link's text. The link's URI and title are provided by the +matching [link reference definition]. + +Here is a simple example: + +```````````````````````````````` example +[foo][bar] + +[bar]: /url "title" +. +

foo

+```````````````````````````````` + + +The rules for the [link text] are the same as with +[inline links]. Thus: + +The link text may contain balanced brackets, but not unbalanced ones, +unless they are escaped: + +```````````````````````````````` example +[link [foo [bar]]][ref] + +[ref]: /uri +. +

link [foo [bar]]

+```````````````````````````````` + + +```````````````````````````````` example +[link \[bar][ref] + +[ref]: /uri +. +

link [bar

+```````````````````````````````` + + +The link text may contain inline content: + +```````````````````````````````` example +[link *foo **bar** `#`*][ref] + +[ref]: /uri +. +

link foo bar #

+```````````````````````````````` + + +```````````````````````````````` example +[![moon](moon.jpg)][ref] + +[ref]: /uri +. +

moon

+```````````````````````````````` + + +However, links may not contain other links, at any level of nesting. + +```````````````````````````````` example +[foo [bar](/uri)][ref] + +[ref]: /uri +. +

[foo bar]ref

+```````````````````````````````` + + +```````````````````````````````` example +[foo *bar [baz][ref]*][ref] + +[ref]: /uri +. +

[foo bar baz]ref

+```````````````````````````````` + + +(In the examples above, we have two [shortcut reference links] +instead of one [full reference link].) + +The following cases illustrate the precedence of link text grouping over +emphasis grouping: + +```````````````````````````````` example +*[foo*][ref] + +[ref]: /uri +. +

*foo*

+```````````````````````````````` + + +```````````````````````````````` example +[foo *bar][ref] + +[ref]: /uri +. +

foo *bar

+```````````````````````````````` + + +These cases illustrate the precedence of HTML tags, code spans, +and autolinks over link grouping: + +```````````````````````````````` example +[foo + +[ref]: /uri +. +

[foo

+```````````````````````````````` + + +```````````````````````````````` example +[foo`][ref]` + +[ref]: /uri +. +

[foo][ref]

+```````````````````````````````` + + +```````````````````````````````` example +[foo + +[ref]: /uri +. +

[foohttp://example.com/?search=][ref]

+```````````````````````````````` + + +Matching is case-insensitive: + +```````````````````````````````` example +[foo][BaR] + +[bar]: /url "title" +. +

foo

+```````````````````````````````` + + +Unicode case fold is used: + +```````````````````````````````` example +[Толпой][Толпой] is a Russian word. + +[ТОЛПОЙ]: /url +. +

Толпой is a Russian word.

+```````````````````````````````` + + +Consecutive internal [whitespace] is treated as one space for +purposes of determining matching: + +```````````````````````````````` example +[Foo + bar]: /url + +[Baz][Foo bar] +. +

Baz

+```````````````````````````````` + + +No [whitespace] is allowed between the [link text] and the +[link label]: + +```````````````````````````````` example +[foo] [bar] + +[bar]: /url "title" +. +

[foo] bar

+```````````````````````````````` + + +```````````````````````````````` example +[foo] +[bar] + +[bar]: /url "title" +. +

[foo] +bar

+```````````````````````````````` + + +This is a departure from John Gruber's original Markdown syntax +description, which explicitly allows whitespace between the link +text and the link label. It brings reference links in line with +[inline links], which (according to both original Markdown and +this spec) cannot have whitespace after the link text. More +importantly, it prevents inadvertent capture of consecutive +[shortcut reference links]. If whitespace is allowed between the +link text and the link label, then in the following we will have +a single reference link, not two shortcut reference links, as +intended: + +``` markdown +[foo] +[bar] + +[foo]: /url1 +[bar]: /url2 +``` + +(Note that [shortcut reference links] were introduced by Gruber +himself in a beta version of `Markdown.pl`, but never included +in the official syntax description. Without shortcut reference +links, it is harmless to allow space between the link text and +link label; but once shortcut references are introduced, it is +too dangerous to allow this, as it frequently leads to +unintended results.) + +When there are multiple matching [link reference definitions], +the first is used: + +```````````````````````````````` example +[foo]: /url1 + +[foo]: /url2 + +[bar][foo] +. +

bar

+```````````````````````````````` + + +Note that matching is performed on normalized strings, not parsed +inline content. So the following does not match, even though the +labels define equivalent inline content: + +```````````````````````````````` example +[bar][foo\!] + +[foo!]: /url +. +

[bar][foo!]

+```````````````````````````````` + + +[Link labels] cannot contain brackets, unless they are +backslash-escaped: + +```````````````````````````````` example +[foo][ref[] + +[ref[]: /uri +. +

[foo][ref[]

+

[ref[]: /uri

+```````````````````````````````` + + +```````````````````````````````` example +[foo][ref[bar]] + +[ref[bar]]: /uri +. +

[foo][ref[bar]]

+

[ref[bar]]: /uri

+```````````````````````````````` + + +```````````````````````````````` example +[[[foo]]] + +[[[foo]]]: /url +. +

[[[foo]]]

+

[[[foo]]]: /url

+```````````````````````````````` + + +```````````````````````````````` example +[foo][ref\[] + +[ref\[]: /uri +. +

foo

+```````````````````````````````` + + +Note that in this example `]` is not backslash-escaped: + +```````````````````````````````` example +[bar\\]: /uri + +[bar\\] +. +

bar\

+```````````````````````````````` + + +A [link label] must contain at least one [non-whitespace character]: + +```````````````````````````````` example +[] + +[]: /uri +. +

[]

+

[]: /uri

+```````````````````````````````` + + +```````````````````````````````` example +[ + ] + +[ + ]: /uri +. +

[ +]

+

[ +]: /uri

+```````````````````````````````` + + +A [collapsed reference link](@) +consists of a [link label] that [matches] a +[link reference definition] elsewhere in the +document, followed by the string `[]`. +The contents of the first link label are parsed as inlines, +which are used as the link's text. The link's URI and title are +provided by the matching reference link definition. Thus, +`[foo][]` is equivalent to `[foo][foo]`. + +```````````````````````````````` example +[foo][] + +[foo]: /url "title" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +[*foo* bar][] + +[*foo* bar]: /url "title" +. +

foo bar

+```````````````````````````````` + + +The link labels are case-insensitive: + +```````````````````````````````` example +[Foo][] + +[foo]: /url "title" +. +

Foo

+```````````````````````````````` + + + +As with full reference links, [whitespace] is not +allowed between the two sets of brackets: + +```````````````````````````````` example +[foo] +[] + +[foo]: /url "title" +. +

foo +[]

+```````````````````````````````` + + +A [shortcut reference link](@) +consists of a [link label] that [matches] a +[link reference definition] elsewhere in the +document and is not followed by `[]` or a link label. +The contents of the first link label are parsed as inlines, +which are used as the link's text. The link's URI and title +are provided by the matching link reference definition. +Thus, `[foo]` is equivalent to `[foo][]`. + +```````````````````````````````` example +[foo] + +[foo]: /url "title" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +[*foo* bar] + +[*foo* bar]: /url "title" +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +[[*foo* bar]] + +[*foo* bar]: /url "title" +. +

[foo bar]

+```````````````````````````````` + + +```````````````````````````````` example +[[bar [foo] + +[foo]: /url +. +

[[bar foo

+```````````````````````````````` + + +The link labels are case-insensitive: + +```````````````````````````````` example +[Foo] + +[foo]: /url "title" +. +

Foo

+```````````````````````````````` + + +A space after the link text should be preserved: + +```````````````````````````````` example +[foo] bar + +[foo]: /url +. +

foo bar

+```````````````````````````````` + + +If you just want bracketed text, you can backslash-escape the +opening bracket to avoid links: + +```````````````````````````````` example +\[foo] + +[foo]: /url "title" +. +

[foo]

+```````````````````````````````` + + +Note that this is a link, because a link label ends with the first +following closing bracket: + +```````````````````````````````` example +[foo*]: /url + +*[foo*] +. +

*foo*

+```````````````````````````````` + + +Full and compact references take precedence over shortcut +references: + +```````````````````````````````` example +[foo][bar] + +[foo]: /url1 +[bar]: /url2 +. +

foo

+```````````````````````````````` + +```````````````````````````````` example +[foo][] + +[foo]: /url1 +. +

foo

+```````````````````````````````` + +Inline links also take precedence: + +```````````````````````````````` example +[foo]() + +[foo]: /url1 +. +

foo

+```````````````````````````````` + +```````````````````````````````` example +[foo](not a link) + +[foo]: /url1 +. +

foo(not a link)

+```````````````````````````````` + +In the following case `[bar][baz]` is parsed as a reference, +`[foo]` as normal text: + +```````````````````````````````` example +[foo][bar][baz] + +[baz]: /url +. +

[foo]bar

+```````````````````````````````` + + +Here, though, `[foo][bar]` is parsed as a reference, since +`[bar]` is defined: + +```````````````````````````````` example +[foo][bar][baz] + +[baz]: /url1 +[bar]: /url2 +. +

foobaz

+```````````````````````````````` + + +Here `[foo]` is not parsed as a shortcut reference, because it +is followed by a link label (even though `[bar]` is not defined): + +```````````````````````````````` example +[foo][bar][baz] + +[baz]: /url1 +[foo]: /url2 +. +

[foo]bar

+```````````````````````````````` + + + +## Images + +Syntax for images is like the syntax for links, with one +difference. Instead of [link text], we have an +[image description](@). The rules for this are the +same as for [link text], except that (a) an +image description starts with `![` rather than `[`, and +(b) an image description may contain links. +An image description has inline elements +as its contents. When an image is rendered to HTML, +this is standardly used as the image's `alt` attribute. + +```````````````````````````````` example +![foo](/url "title") +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +![foo *bar*] + +[foo *bar*]: train.jpg "train & tracks" +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +![foo ![bar](/url)](/url2) +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +![foo [bar](/url)](/url2) +. +

foo bar

+```````````````````````````````` + + +Though this spec is concerned with parsing, not rendering, it is +recommended that in rendering to HTML, only the plain string content +of the [image description] be used. Note that in +the above example, the alt attribute's value is `foo bar`, not `foo +[bar](/url)` or `foo bar`. Only the plain string +content is rendered, without formatting. + +```````````````````````````````` example +![foo *bar*][] + +[foo *bar*]: train.jpg "train & tracks" +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +![foo *bar*][foobar] + +[FOOBAR]: train.jpg "train & tracks" +. +

foo bar

+```````````````````````````````` + + +```````````````````````````````` example +![foo](train.jpg) +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +My ![foo bar](/path/to/train.jpg "title" ) +. +

My foo bar

+```````````````````````````````` + + +```````````````````````````````` example +![foo]() +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +![](/url) +. +

+```````````````````````````````` + + +Reference-style: + +```````````````````````````````` example +![foo][bar] + +[bar]: /url +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +![foo][bar] + +[BAR]: /url +. +

foo

+```````````````````````````````` + + +Collapsed: + +```````````````````````````````` example +![foo][] + +[foo]: /url "title" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +![*foo* bar][] + +[*foo* bar]: /url "title" +. +

foo bar

+```````````````````````````````` + + +The labels are case-insensitive: + +```````````````````````````````` example +![Foo][] + +[foo]: /url "title" +. +

Foo

+```````````````````````````````` + + +As with reference links, [whitespace] is not allowed +between the two sets of brackets: + +```````````````````````````````` example +![foo] +[] + +[foo]: /url "title" +. +

foo +[]

+```````````````````````````````` + + +Shortcut: + +```````````````````````````````` example +![foo] + +[foo]: /url "title" +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +![*foo* bar] + +[*foo* bar]: /url "title" +. +

foo bar

+```````````````````````````````` + + +Note that link labels cannot contain unescaped brackets: + +```````````````````````````````` example +![[foo]] + +[[foo]]: /url "title" +. +

![[foo]]

+

[[foo]]: /url "title"

+```````````````````````````````` + + +The link labels are case-insensitive: + +```````````````````````````````` example +![Foo] + +[foo]: /url "title" +. +

Foo

+```````````````````````````````` + + +If you just want bracketed text, you can backslash-escape the +opening `!` and `[`: + +```````````````````````````````` example +\!\[foo] + +[foo]: /url "title" +. +

![foo]

+```````````````````````````````` + + +If you want a link after a literal `!`, backslash-escape the +`!`: + +```````````````````````````````` example +\![foo] + +[foo]: /url "title" +. +

!foo

+```````````````````````````````` + + +## Autolinks + +[Autolink](@)s are absolute URIs and email addresses inside +`<` and `>`. They are parsed as links, with the URL or email address +as the link label. + +A [URI autolink](@) consists of `<`, followed by an +[absolute URI] not containing `<`, followed by `>`. It is parsed as +a link to the URI, with the URI as the link's label. + +An [absolute URI](@), +for these purposes, consists of a [scheme] followed by a colon (`:`) +followed by zero or more characters other than ASCII +[whitespace] and control characters, `<`, and `>`. If +the URI includes these characters, they must be percent-encoded +(e.g. `%20` for a space). + +For purposes of this spec, a [scheme](@) is any sequence +of 2--32 characters beginning with an ASCII letter and followed +by any combination of ASCII letters, digits, or the symbols plus +("+"), period ("."), or hyphen ("-"). + +Here are some valid autolinks: + +```````````````````````````````` example + +. +

http://foo.bar.baz

+```````````````````````````````` + + +```````````````````````````````` example + +. +

http://foo.bar.baz/test?q=hello&id=22&boolean

+```````````````````````````````` + + +```````````````````````````````` example + +. +

irc://foo.bar:2233/baz

+```````````````````````````````` + + +Uppercase is also fine: + +```````````````````````````````` example + +. +

MAILTO:FOO@BAR.BAZ

+```````````````````````````````` + + +Note that many strings that count as [absolute URIs] for +purposes of this spec are not valid URIs, because their +schemes are not registered or because of other problems +with their syntax: + +```````````````````````````````` example + +. +

a+b+c:d

+```````````````````````````````` + + +```````````````````````````````` example + +. +

made-up-scheme://foo,bar

+```````````````````````````````` + + +```````````````````````````````` example + +. +

http://../

+```````````````````````````````` + + +```````````````````````````````` example + +. +

localhost:5001/foo

+```````````````````````````````` + + +Spaces are not allowed in autolinks: + +```````````````````````````````` example + +. +

<http://foo.bar/baz bim>

+```````````````````````````````` + + +Backslash-escapes do not work inside autolinks: + +```````````````````````````````` example + +. +

http://example.com/\[\

+```````````````````````````````` + + +An [email autolink](@) +consists of `<`, followed by an [email address], +followed by `>`. The link's label is the email address, +and the URL is `mailto:` followed by the email address. + +An [email address](@), +for these purposes, is anything that matches +the [non-normative regex from the HTML5 +spec](https://html.spec.whatwg.org/multipage/forms.html#e-mail-state-(type=email)): + + /^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? + (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/ + +Examples of email autolinks: + +```````````````````````````````` example + +. +

foo@bar.example.com

+```````````````````````````````` + + +```````````````````````````````` example + +. +

foo+special@Bar.baz-bar0.com

+```````````````````````````````` + + +Backslash-escapes do not work inside email autolinks: + +```````````````````````````````` example + +. +

<foo+@bar.example.com>

+```````````````````````````````` + + +These are not autolinks: + +```````````````````````````````` example +<> +. +

<>

+```````````````````````````````` + + +```````````````````````````````` example +< http://foo.bar > +. +

< http://foo.bar >

+```````````````````````````````` + + +```````````````````````````````` example + +. +

<m:abc>

+```````````````````````````````` + + +```````````````````````````````` example + +. +

<foo.bar.baz>

+```````````````````````````````` + + +```````````````````````````````` example +http://example.com +. +

http://example.com

+```````````````````````````````` + + +```````````````````````````````` example +foo@bar.example.com +. +

foo@bar.example.com

+```````````````````````````````` + + +## Raw HTML + +Text between `<` and `>` that looks like an HTML tag is parsed as a +raw HTML tag and will be rendered in HTML without escaping. +Tag and attribute names are not limited to current HTML tags, +so custom tags (and even, say, DocBook tags) may be used. + +Here is the grammar for tags: + +A [tag name](@) consists of an ASCII letter +followed by zero or more ASCII letters, digits, or +hyphens (`-`). + +An [attribute](@) consists of [whitespace], +an [attribute name], and an optional +[attribute value specification]. + +An [attribute name](@) +consists of an ASCII letter, `_`, or `:`, followed by zero or more ASCII +letters, digits, `_`, `.`, `:`, or `-`. (Note: This is the XML +specification restricted to ASCII. HTML5 is laxer.) + +An [attribute value specification](@) +consists of optional [whitespace], +a `=` character, optional [whitespace], and an [attribute +value]. + +An [attribute value](@) +consists of an [unquoted attribute value], +a [single-quoted attribute value], or a [double-quoted attribute value]. + +An [unquoted attribute value](@) +is a nonempty string of characters not +including spaces, `"`, `'`, `=`, `<`, `>`, or `` ` ``. + +A [single-quoted attribute value](@) +consists of `'`, zero or more +characters not including `'`, and a final `'`. + +A [double-quoted attribute value](@) +consists of `"`, zero or more +characters not including `"`, and a final `"`. + +An [open tag](@) consists of a `<` character, a [tag name], +zero or more [attributes], optional [whitespace], an optional `/` +character, and a `>` character. + +A [closing tag](@) consists of the string ``. + +An [HTML comment](@) consists of ``, +where *text* does not start with `>` or `->`, does not end with `-`, +and does not contain `--`. (See the +[HTML5 spec](http://www.w3.org/TR/html5/syntax.html#comments).) + +A [processing instruction](@) +consists of the string ``, and the string +`?>`. + +A [declaration](@) consists of the +string ``, and the character `>`. + +A [CDATA section](@) consists of +the string ``, and the string `]]>`. + +An [HTML tag](@) consists of an [open tag], a [closing tag], +an [HTML comment], a [processing instruction], a [declaration], +or a [CDATA section]. + +Here are some simple open tags: + +```````````````````````````````` example + +. +

+```````````````````````````````` + + +Empty elements: + +```````````````````````````````` example + +. +

+```````````````````````````````` + + +[Whitespace] is allowed: + +```````````````````````````````` example + +. +

+```````````````````````````````` + + +With attributes: + +```````````````````````````````` example + +. +

+```````````````````````````````` + + +Custom tag names can be used: + +```````````````````````````````` example +Foo +. +

Foo

+```````````````````````````````` + + +Illegal tag names, not parsed as HTML: + +```````````````````````````````` example +<33> <__> +. +

<33> <__>

+```````````````````````````````` + + +Illegal attribute names: + +```````````````````````````````` example +
+. +

<a h*#ref="hi">

+```````````````````````````````` + + +Illegal attribute values: + +```````````````````````````````` example +
+. +

</a href="foo">

+```````````````````````````````` + + +Comments: + +```````````````````````````````` example +foo +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +foo +. +

foo <!-- not a comment -- two hyphens -->

+```````````````````````````````` + + +Not comments: + +```````````````````````````````` example +foo foo --> + +foo +. +

foo <!--> foo -->

+

foo <!-- foo--->

+```````````````````````````````` + + +Processing instructions: + +```````````````````````````````` example +foo +. +

foo

+```````````````````````````````` + + +Declarations: + +```````````````````````````````` example +foo +. +

foo

+```````````````````````````````` + + +CDATA sections: + +```````````````````````````````` example +foo &<]]> +. +

foo &<]]>

+```````````````````````````````` + + +Entity and numeric character references are preserved in HTML +attributes: + +```````````````````````````````` example +foo
+. +

foo

+```````````````````````````````` + + +Backslash escapes do not work in HTML attributes: + +```````````````````````````````` example +foo +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example + +. +

<a href=""">

+```````````````````````````````` + + +## Hard line breaks + +A line break (not in a code span or HTML tag) that is preceded +by two or more spaces and does not occur at the end of a block +is parsed as a [hard line break](@) (rendered +in HTML as a `
` tag): + +```````````````````````````````` example +foo +baz +. +

foo
+baz

+```````````````````````````````` + + +For a more visible alternative, a backslash before the +[line ending] may be used instead of two spaces: + +```````````````````````````````` example +foo\ +baz +. +

foo
+baz

+```````````````````````````````` + + +More than two spaces can be used: + +```````````````````````````````` example +foo +baz +. +

foo
+baz

+```````````````````````````````` + + +Leading spaces at the beginning of the next line are ignored: + +```````````````````````````````` example +foo + bar +. +

foo
+bar

+```````````````````````````````` + + +```````````````````````````````` example +foo\ + bar +. +

foo
+bar

+```````````````````````````````` + + +Line breaks can occur inside emphasis, links, and other constructs +that allow inline content: + +```````````````````````````````` example +*foo +bar* +. +

foo
+bar

+```````````````````````````````` + + +```````````````````````````````` example +*foo\ +bar* +. +

foo
+bar

+```````````````````````````````` + + +Line breaks do not occur inside code spans + +```````````````````````````````` example +`code +span` +. +

code span

+```````````````````````````````` + + +```````````````````````````````` example +`code\ +span` +. +

code\ span

+```````````````````````````````` + + +or HTML tags: + +```````````````````````````````` example +
+. +

+```````````````````````````````` + + +```````````````````````````````` example + +. +

+```````````````````````````````` + + +Hard line breaks are for separating inline content within a block. +Neither syntax for hard line breaks works at the end of a paragraph or +other block element: + +```````````````````````````````` example +foo\ +. +

foo\

+```````````````````````````````` + + +```````````````````````````````` example +foo +. +

foo

+```````````````````````````````` + + +```````````````````````````````` example +### foo\ +. +

foo\

+```````````````````````````````` + + +```````````````````````````````` example +### foo +. +

foo

+```````````````````````````````` + + +## Soft line breaks + +A regular line break (not in a code span or HTML tag) that is not +preceded by two or more spaces or a backslash is parsed as a +[softbreak](@). (A softbreak may be rendered in HTML either as a +[line ending] or as a space. The result will be the same in +browsers. In the examples here, a [line ending] will be used.) + +```````````````````````````````` example +foo +baz +. +

foo +baz

+```````````````````````````````` + + +Spaces at the end of the line and beginning of the next line are +removed: + +```````````````````````````````` example +foo + baz +. +

foo +baz

+```````````````````````````````` + + +A conforming parser may render a soft line break in HTML either as a +line break or as a space. + +A renderer may also provide an option to render soft line breaks +as hard line breaks. + +## Textual content + +Any characters not given an interpretation by the above rules will +be parsed as plain textual content. + +```````````````````````````````` example +hello $.;'there +. +

hello $.;'there

+```````````````````````````````` + + +```````````````````````````````` example +Foo χρῆν +. +

Foo χρῆν

+```````````````````````````````` + + +Internal spaces are preserved verbatim: + +```````````````````````````````` example +Multiple spaces +. +

Multiple spaces

+```````````````````````````````` + + + + +# Appendix: A parsing strategy + +In this appendix we describe some features of the parsing strategy +used in the CommonMark reference implementations. + +## Overview + +Parsing has two phases: + +1. In the first phase, lines of input are consumed and the block +structure of the document---its division into paragraphs, block quotes, +list items, and so on---is constructed. Text is assigned to these +blocks but not parsed. Link reference definitions are parsed and a +map of links is constructed. + +2. In the second phase, the raw text contents of paragraphs and headings +are parsed into sequences of Markdown inline elements (strings, +code spans, links, emphasis, and so on), using the map of link +references constructed in phase 1. + +At each point in processing, the document is represented as a tree of +**blocks**. The root of the tree is a `document` block. The `document` +may have any number of other blocks as **children**. These children +may, in turn, have other blocks as children. The last child of a block +is normally considered **open**, meaning that subsequent lines of input +can alter its contents. (Blocks that are not open are **closed**.) +Here, for example, is a possible document tree, with the open blocks +marked by arrows: + +``` tree +-> document + -> block_quote + paragraph + "Lorem ipsum dolor\nsit amet." + -> list (type=bullet tight=true bullet_char=-) + list_item + paragraph + "Qui *quodsi iracundia*" + -> list_item + -> paragraph + "aliquando id" +``` + +## Phase 1: block structure + +Each line that is processed has an effect on this tree. The line is +analyzed and, depending on its contents, the document may be altered +in one or more of the following ways: + +1. One or more open blocks may be closed. +2. One or more new blocks may be created as children of the + last open block. +3. Text may be added to the last (deepest) open block remaining + on the tree. + +Once a line has been incorporated into the tree in this way, +it can be discarded, so input can be read in a stream. + +For each line, we follow this procedure: + +1. First we iterate through the open blocks, starting with the +root document, and descending through last children down to the last +open block. Each block imposes a condition that the line must satisfy +if the block is to remain open. For example, a block quote requires a +`>` character. A paragraph requires a non-blank line. +In this phase we may match all or just some of the open +blocks. But we cannot close unmatched blocks yet, because we may have a +[lazy continuation line]. + +2. Next, after consuming the continuation markers for existing +blocks, we look for new block starts (e.g. `>` for a block quote). +If we encounter a new block start, we close any blocks unmatched +in step 1 before creating the new block as a child of the last +matched block. + +3. Finally, we look at the remainder of the line (after block +markers like `>`, list markers, and indentation have been consumed). +This is text that can be incorporated into the last open +block (a paragraph, code block, heading, or raw HTML). + +Setext headings are formed when we see a line of a paragraph +that is a [setext heading underline]. + +Reference link definitions are detected when a paragraph is closed; +the accumulated text lines are parsed to see if they begin with +one or more reference link definitions. Any remainder becomes a +normal paragraph. + +We can see how this works by considering how the tree above is +generated by four lines of Markdown: + +``` markdown +> Lorem ipsum dolor +sit amet. +> - Qui *quodsi iracundia* +> - aliquando id +``` + +At the outset, our document model is just + +``` tree +-> document +``` + +The first line of our text, + +``` markdown +> Lorem ipsum dolor +``` + +causes a `block_quote` block to be created as a child of our +open `document` block, and a `paragraph` block as a child of +the `block_quote`. Then the text is added to the last open +block, the `paragraph`: + +``` tree +-> document + -> block_quote + -> paragraph + "Lorem ipsum dolor" +``` + +The next line, + +``` markdown +sit amet. +``` + +is a "lazy continuation" of the open `paragraph`, so it gets added +to the paragraph's text: + +``` tree +-> document + -> block_quote + -> paragraph + "Lorem ipsum dolor\nsit amet." +``` + +The third line, + +``` markdown +> - Qui *quodsi iracundia* +``` + +causes the `paragraph` block to be closed, and a new `list` block +opened as a child of the `block_quote`. A `list_item` is also +added as a child of the `list`, and a `paragraph` as a child of +the `list_item`. The text is then added to the new `paragraph`: + +``` tree +-> document + -> block_quote + paragraph + "Lorem ipsum dolor\nsit amet." + -> list (type=bullet tight=true bullet_char=-) + -> list_item + -> paragraph + "Qui *quodsi iracundia*" +``` + +The fourth line, + +``` markdown +> - aliquando id +``` + +causes the `list_item` (and its child the `paragraph`) to be closed, +and a new `list_item` opened up as child of the `list`. A `paragraph` +is added as a child of the new `list_item`, to contain the text. +We thus obtain the final tree: + +``` tree +-> document + -> block_quote + paragraph + "Lorem ipsum dolor\nsit amet." + -> list (type=bullet tight=true bullet_char=-) + list_item + paragraph + "Qui *quodsi iracundia*" + -> list_item + -> paragraph + "aliquando id" +``` + +## Phase 2: inline structure + +Once all of the input has been parsed, all open blocks are closed. + +We then "walk the tree," visiting every node, and parse raw +string contents of paragraphs and headings as inlines. At this +point we have seen all the link reference definitions, so we can +resolve reference links as we go. + +``` tree +document + block_quote + paragraph + str "Lorem ipsum dolor" + softbreak + str "sit amet." + list (type=bullet tight=true bullet_char=-) + list_item + paragraph + str "Qui " + emph + str "quodsi iracundia" + list_item + paragraph + str "aliquando id" +``` + +Notice how the [line ending] in the first paragraph has +been parsed as a `softbreak`, and the asterisks in the first list item +have become an `emph`. + +### An algorithm for parsing nested emphasis and links + +By far the trickiest part of inline parsing is handling emphasis, +strong emphasis, links, and images. This is done using the following +algorithm. + +When we're parsing inlines and we hit either + +- a run of `*` or `_` characters, or +- a `[` or `![` + +we insert a text node with these symbols as its literal content, and we +add a pointer to this text node to the [delimiter stack](@). + +The [delimiter stack] is a doubly linked list. Each +element contains a pointer to a text node, plus information about + +- the type of delimiter (`[`, `![`, `*`, `_`) +- the number of delimiters, +- whether the delimiter is "active" (all are active to start), and +- whether the delimiter is a potential opener, a potential closer, + or both (which depends on what sort of characters precede + and follow the delimiters). + +When we hit a `]` character, we call the *look for link or image* +procedure (see below). + +When we hit the end of the input, we call the *process emphasis* +procedure (see below), with `stack_bottom` = NULL. + +#### *look for link or image* + +Starting at the top of the delimiter stack, we look backwards +through the stack for an opening `[` or `![` delimiter. + +- If we don't find one, we return a literal text node `]`. + +- If we do find one, but it's not *active*, we remove the inactive + delimiter from the stack, and return a literal text node `]`. + +- If we find one and it's active, then we parse ahead to see if + we have an inline link/image, reference link/image, compact reference + link/image, or shortcut reference link/image. + + + If we don't, then we remove the opening delimiter from the + delimiter stack and return a literal text node `]`. + + + If we do, then + + * We return a link or image node whose children are the inlines + after the text node pointed to by the opening delimiter. + + * We run *process emphasis* on these inlines, with the `[` opener + as `stack_bottom`. + + * We remove the opening delimiter. + + * If we have a link (and not an image), we also set all + `[` delimiters before the opening delimiter to *inactive*. (This + will prevent us from getting links within links.) + +#### *process emphasis* + +Parameter `stack_bottom` sets a lower bound to how far we +descend in the [delimiter stack]. If it is NULL, we can +go all the way to the bottom. Otherwise, we stop before +visiting `stack_bottom`. + +Let `current_position` point to the element on the [delimiter stack] +just above `stack_bottom` (or the first element if `stack_bottom` +is NULL). + +We keep track of the `openers_bottom` for each delimiter +type (`*`, `_`). Initialize this to `stack_bottom`. + +Then we repeat the following until we run out of potential +closers: + +- Move `current_position` forward in the delimiter stack (if needed) + until we find the first potential closer with delimiter `*` or `_`. + (This will be the potential closer closest + to the beginning of the input -- the first one in parse order.) + +- Now, look back in the stack (staying above `stack_bottom` and + the `openers_bottom` for this delimiter type) for the + first matching potential opener ("matching" means same delimiter). + +- If one is found: + + + Figure out whether we have emphasis or strong emphasis: + if both closer and opener spans have length >= 2, we have + strong, otherwise regular. + + + Insert an emph or strong emph node accordingly, after + the text node corresponding to the opener. + + + Remove any delimiters between the opener and closer from + the delimiter stack. + + + Remove 1 (for regular emph) or 2 (for strong emph) delimiters + from the opening and closing text nodes. If they become empty + as a result, remove them and remove the corresponding element + of the delimiter stack. If the closing node is removed, reset + `current_position` to the next element in the stack. + +- If none in found: + + + Set `openers_bottom` to the element before `current_position`. + (We know that there are no openers for this kind of closer up to and + including this point, so this puts a lower bound on future searches.) + + + If the closer at `current_position` is not a potential opener, + remove it from the delimiter stack (since we know it can't + be a closer either). + + + Advance `current_position` to the next element in the stack. + +After we're done, we remove all delimiters above `stack_bottom` from the +delimiter stack. + diff --git a/lib/markd/spec/markd_spec.cr b/lib/markd/spec/markd_spec.cr new file mode 100644 index 000000000000..3f4d2156478c --- /dev/null +++ b/lib/markd/spec/markd_spec.cr @@ -0,0 +1,20 @@ +require "./spec_helper" + +# Commonmark spec exapmles +describe_spec("fixtures/spec.txt") + +# Smart punctuation exapmles +describe_spec("fixtures/smart_punct.txt", smart: true) + +# Regression exapmles +describe_spec("fixtures/regression.txt") + +describe Markd do + # Thanks Ryan Westlund feedback via email. + it "should escape unsafe html" do + raw = %Q{```">\n```} + html = %Q{
\n} + + Markd.to_html(raw).should eq(html) + end +end diff --git a/lib/markd/spec/spec_helper.cr b/lib/markd/spec/spec_helper.cr new file mode 100644 index 000000000000..18287caf6ff1 --- /dev/null +++ b/lib/markd/spec/spec_helper.cr @@ -0,0 +1,112 @@ +require "spec" +require "../src/markd" + +def describe_spec(file, smart = false, render = false) + specs = extract_spec_tests(file) + + skip_examples = [] of Int32 + + if render + puts "Run [#{file}] examples" + examples_count = 0 + section_count = 0 + specs.each_with_index do |(section, examples), index| + section = "#{(index + 1).to_s.rjust(2)}. #{section} (#{examples.size})" + if skip_examples.includes?(index + 1) + puts section + " [SKIP]" + next + end + section_count += 1 + examples_count += examples.size + puts section + end + puts "Total #{section_count} describes and #{examples_count} examples" + end + + specs.each_with_index do |(section, examples), index| + no = index + 1 + next if skip_examples.includes?(no) + assert_section(file, section, examples, smart) + end +end + +def assert_section(file, section, examples, smart) + describe section do + examples.each do |index, example| + assert_exapmle(file, section, index, example, smart) + end + end +end + +def assert_exapmle(file, section, index, example, smart) + markdown = example["markdown"].gsub("→", "\t").chomp + html = example["html"].gsub("→", "\t") + line = example["line"].to_i + + options = Markd::Options.new + options.smart = true if smart + it "- #{index}\n#{show_space(markdown)}", file, line do + output = Markd.to_html(markdown, options) + output.should eq(html), file: file, line: line + end +end + +def extract_spec_tests(file) + data = [] of String + delimiter = "`" * 32 + + examples = {} of String => Hash(Int32, Hash(String, String)) + + current_section = 0 + example_count = 0 + test_start = false + result_start = false + + path = File.expand_path(File.join("..", file), __FILE__) + begin + File.open(path) do |f| + line_number = 0 + while line = f.read_line + line_number += 1 + line = line.gsub(/\r\n?/, "\n") + break if line.includes?("") + + if !test_start && !result_start && (match = line.match(/^\#{1,6}\s+(.*)$/)) + current_section = match[1] + examples[current_section] = {} of Int32 => Hash(String, String) + example_count = 0 + else + if !test_start && !result_start && line =~ /^`{32} example$/ + test_start = true + elsif test_start && !result_start && line =~ /^\.$/ + test_start = false + result_start = true + elsif !test_start && result_start && line =~ /^`{32}/ + result_start = false + example_count += 1 + elsif test_start && !result_start + examples[current_section][example_count] ||= { + "line" => line_number.to_s, + "markdown" => "", + "html" => "", + } of String => String + + examples[current_section][example_count]["markdown"] += line + "\n" + elsif !test_start && result_start + examples[current_section][example_count]["html"] += line + "\n" + end + end + end + end + rescue IO::EOFError + # do nothing + end + + # Remove empty examples + examples.keys.each { |k| examples.delete(k) if examples[k].empty? } + examples +end + +def show_space(text) + text.gsub("\t", "→").gsub(/ /, '␣') +end diff --git a/lib/markd/src/markd.cr b/lib/markd/src/markd.cr new file mode 100644 index 000000000000..c3ee65c58819 --- /dev/null +++ b/lib/markd/src/markd.cr @@ -0,0 +1,18 @@ +require "./markd/html_entities" +require "./markd/utils" +require "./markd/node" +require "./markd/rule" +require "./markd/options" +require "./markd/renderer" +require "./markd/parser" +require "./markd/version" + +module Markd + def self.to_html(source : String, options = Options.new) + return "" if source.empty? + + document = Parser.parse(source, options) + renderer = HTMLRenderer.new(options) + renderer.render(document) + end +end diff --git a/lib/markd/src/markd/html_entities.cr b/lib/markd/src/markd/html_entities.cr new file mode 100644 index 000000000000..e5ae6460defb --- /dev/null +++ b/lib/markd/src/markd/html_entities.cr @@ -0,0 +1,91 @@ +require "./mappings/*" + +module Markd::HTMLEntities + module ExtendToHTML + def decode_entities(source : String) + Decoder.decode(source) + end + + def decode_entity(source : String) + Decoder.decode_entity(source) + end + + def encode_entities(source) + Encoder.encode(source) + end + end + + module Decoder + REGEX = /&(?:([a-zA-Z0-9]{2,32};)|(#[xX][\da-fA-F]+;?|#\d+;?))/ + + def self.decode(source) + source.gsub(REGEX) do |chars| + decode_entity(chars[1..-2]) + end + end + + def self.decode_entity(chars) + if chars[0] == '#' + if chars.size > 1 + if chars[1].downcase == 'x' + if chars.size > 2 + return decode_codepoint(chars[2..-1].to_i(16)) + end + else + return decode_codepoint(chars[1..-1].to_i(10)) + end + end + else + entities_key = chars[0..-1] + if resolved_entity = Markd::HTMLEntities::ENTITIES_MAPPINGS[entities_key]? + return resolved_entity + end + end + + "&#{chars};" + end + + def self.decode_codepoint(codepoint) + return "\uFFFD" if codepoint >= 0xD800 && codepoint <= 0xDFFF || codepoint > 0x10FFF + + if decoded = Markd::HTMLEntities::DECODE_MAPPINGS[codepoint]? + codepoint = decoded + end + + codepoint.unsafe_chr + end + end + + module Encoder + ENTITIES_REGEX = Regex.union(HTMLEntities::ENTITIES_MAPPINGS.values) + ASTRAL_REGEX = Regex.new("[\xED\xA0\x80-\xED\xAF\xBF][\xED\xB0\x80-\xED\xBF\xBF]") + ENCODE_REGEX = /[^\x{20}-\x{7E}]/ + + def self.encode(source : String) + source.gsub(ENTITIES_REGEX) { |chars| encode_entities(chars) } + .gsub(ASTRAL_REGEX) { |chars| encode_astral(chars) } + .gsub(ENCODE_REGEX) { |chars| encode_extend(chars) } + end + + private def self.encode_entities(chars : String) + entity = HTMLEntities::ENTITIES_MAPPINGS.key(chars) + "&#{entity};" + end + + private def self.encode_astral(chars : String) + high = chars.char_at(0).ord + low = chars.char_at(0).ord + codepoint = (high - 0xD800) * -0x400 + low - 0xDC00 + 0x10000 + + "&#x#{codepoint.to_s(16).upcase};" + end + + private def self.encode_extend(char : String) + "&#x#{char[0].ord.to_s(16).upcase};" + end + end +end + +module HTML + extend Markd::HTMLEntities::ExtendToHTML +end diff --git a/lib/markd/src/markd/mappings/decode.cr b/lib/markd/src/markd/mappings/decode.cr new file mode 100644 index 000000000000..87450fb2a030 --- /dev/null +++ b/lib/markd/src/markd/mappings/decode.cr @@ -0,0 +1,32 @@ +module Markd::HTMLEntities + DECODE_MAPPINGS = { + 0 => 65533, + 128 => 8364, + 130 => 8218, + 131 => 402, + 132 => 8222, + 133 => 8230, + 134 => 8224, + 135 => 8225, + 136 => 710, + 137 => 8240, + 138 => 352, + 139 => 8249, + 140 => 338, + 142 => 381, + 145 => 8216, + 146 => 8217, + 147 => 8220, + 148 => 8221, + 149 => 8226, + 150 => 8211, + 151 => 8212, + 152 => 732, + 153 => 8482, + 154 => 353, + 155 => 8250, + 156 => 339, + 158 => 382, + 159 => 376, + } +end diff --git a/lib/markd/src/markd/mappings/entities.cr b/lib/markd/src/markd/mappings/entities.cr new file mode 100644 index 000000000000..762b6126d4d9 --- /dev/null +++ b/lib/markd/src/markd/mappings/entities.cr @@ -0,0 +1,2129 @@ +module Markd::HTMLEntities + ENTITIES_MAPPINGS = { + "Aacute" => "\u00C1", + "aacute" => "\u00E1", + "Abreve" => "\u0102", + "abreve" => "\u0103", + "ac" => "\u223E", + "acd" => "\u223F", + "acE" => "\u223E\u0333", + "Acirc" => "\u00C2", + "acirc" => "\u00E2", + "acute" => "\u00B4", + "Acy" => "\u0410", + "acy" => "\u0430", + "AElig" => "\u00C6", + "aelig" => "\u00E6", + "af" => "\u2061", + "Afr" => "\xED\xA0\xB5\xED\xB4\x84", + "afr" => "\xED\xA0\xB5\xED\xB4\x9E", + "Agrave" => "\u00C0", + "agrave" => "\u00E0", + "alefsym" => "\u2135", + "aleph" => "\u2135", + "Alpha" => "\u0391", + "alpha" => "\u03B1", + "Amacr" => "\u0100", + "amacr" => "\u0101", + "amalg" => "\u2A3F", + "amp" => "&", + "AMP" => "&", + "andand" => "\u2A55", + "And" => "\u2A53", + "and" => "\u2227", + "andd" => "\u2A5C", + "andslope" => "\u2A58", + "andv" => "\u2A5A", + "ang" => "\u2220", + "ange" => "\u29A4", + "angle" => "\u2220", + "angmsdaa" => "\u29A8", + "angmsdab" => "\u29A9", + "angmsdac" => "\u29AA", + "angmsdad" => "\u29AB", + "angmsdae" => "\u29AC", + "angmsdaf" => "\u29AD", + "angmsdag" => "\u29AE", + "angmsdah" => "\u29AF", + "angmsd" => "\u2221", + "angrt" => "\u221F", + "angrtvb" => "\u22BE", + "angrtvbd" => "\u299D", + "angsph" => "\u2222", + "angst" => "\u00C5", + "angzarr" => "\u237C", + "Aogon" => "\u0104", + "aogon" => "\u0105", + "Aopf" => "\xED\xA0\xB5\xED\xB4\xB8", + "aopf" => "\xED\xA0\xB5\xED\xB5\x92", + "apacir" => "\u2A6F", + "ap" => "\u2248", + "apE" => "\u2A70", + "ape" => "\u224A", + "apid" => "\u224B", + "apos" => "'", + "ApplyFunction" => "\u2061", + "approx" => "\u2248", + "approxeq" => "\u224A", + "Aring" => "\u00C5", + "aring" => "\u00E5", + "Ascr" => "\xED\xA0\xB5\xED\xB2\x9C", + "ascr" => "\xED\xA0\xB5\xED\xB2\xB6", + "Assign" => "\u2254", + "ast" => "*", + "asymp" => "\u2248", + "asympeq" => "\u224D", + "Atilde" => "\u00C3", + "atilde" => "\u00E3", + "Auml" => "\u00C4", + "auml" => "\u00E4", + "awconint" => "\u2233", + "awint" => "\u2A11", + "backcong" => "\u224C", + "backepsilon" => "\u03F6", + "backprime" => "\u2035", + "backsim" => "\u223D", + "backsimeq" => "\u22CD", + "Backslash" => "\u2216", + "Barv" => "\u2AE7", + "barvee" => "\u22BD", + "barwed" => "\u2305", + "Barwed" => "\u2306", + "barwedge" => "\u2305", + "bbrk" => "\u23B5", + "bbrktbrk" => "\u23B6", + "bcong" => "\u224C", + "Bcy" => "\u0411", + "bcy" => "\u0431", + "bdquo" => "\u201E", + "becaus" => "\u2235", + "because" => "\u2235", + "Because" => "\u2235", + "bemptyv" => "\u29B0", + "bepsi" => "\u03F6", + "bernou" => "\u212C", + "Bernoullis" => "\u212C", + "Beta" => "\u0392", + "beta" => "\u03B2", + "beth" => "\u2136", + "between" => "\u226C", + "Bfr" => "\xED\xA0\xB5\xED\xB4\x85", + "bfr" => "\xED\xA0\xB5\xED\xB4\x9F", + "bigcap" => "\u22C2", + "bigcirc" => "\u25EF", + "bigcup" => "\u22C3", + "bigodot" => "\u2A00", + "bigoplus" => "\u2A01", + "bigotimes" => "\u2A02", + "bigsqcup" => "\u2A06", + "bigstar" => "\u2605", + "bigtriangledown" => "\u25BD", + "bigtriangleup" => "\u25B3", + "biguplus" => "\u2A04", + "bigvee" => "\u22C1", + "bigwedge" => "\u22C0", + "bkarow" => "\u290D", + "blacklozenge" => "\u29EB", + "blacksquare" => "\u25AA", + "blacktriangle" => "\u25B4", + "blacktriangledown" => "\u25BE", + "blacktriangleleft" => "\u25C2", + "blacktriangleright" => "\u25B8", + "blank" => "\u2423", + "blk12" => "\u2592", + "blk14" => "\u2591", + "blk34" => "\u2593", + "block" => "\u2588", + "bne" => "=\u20E5", + "bnequiv" => "\u2261\u20E5", + "bNot" => "\u2AED", + "bnot" => "\u2310", + "Bopf" => "\xED\xA0\xB5\xED\xB4\xB9", + "bopf" => "\xED\xA0\xB5\xED\xB5\x93", + "bot" => "\u22A5", + "bottom" => "\u22A5", + "bowtie" => "\u22C8", + "boxbox" => "\u29C9", + "boxdl" => "\u2510", + "boxdL" => "\u2555", + "boxDl" => "\u2556", + "boxDL" => "\u2557", + "boxdr" => "\u250C", + "boxdR" => "\u2552", + "boxDr" => "\u2553", + "boxDR" => "\u2554", + "boxh" => "\u2500", + "boxH" => "\u2550", + "boxhd" => "\u252C", + "boxHd" => "\u2564", + "boxhD" => "\u2565", + "boxHD" => "\u2566", + "boxhu" => "\u2534", + "boxHu" => "\u2567", + "boxhU" => "\u2568", + "boxHU" => "\u2569", + "boxminus" => "\u229F", + "boxplus" => "\u229E", + "boxtimes" => "\u22A0", + "boxul" => "\u2518", + "boxuL" => "\u255B", + "boxUl" => "\u255C", + "boxUL" => "\u255D", + "boxur" => "\u2514", + "boxuR" => "\u2558", + "boxUr" => "\u2559", + "boxUR" => "\u255A", + "boxv" => "\u2502", + "boxV" => "\u2551", + "boxvh" => "\u253C", + "boxvH" => "\u256A", + "boxVh" => "\u256B", + "boxVH" => "\u256C", + "boxvl" => "\u2524", + "boxvL" => "\u2561", + "boxVl" => "\u2562", + "boxVL" => "\u2563", + "boxvr" => "\u251C", + "boxvR" => "\u255E", + "boxVr" => "\u255F", + "boxVR" => "\u2560", + "bprime" => "\u2035", + "breve" => "\u02D8", + "Breve" => "\u02D8", + "brvbar" => "\u00A6", + "bscr" => "\xED\xA0\xB5\xED\xB2\xB7", + "Bscr" => "\u212C", + "bsemi" => "\u204F", + "bsim" => "\u223D", + "bsime" => "\u22CD", + "bsolb" => "\u29C5", + "bsol" => "\\", + "bsolhsub" => "\u27C8", + "bull" => "\u2022", + "bullet" => "\u2022", + "bump" => "\u224E", + "bumpE" => "\u2AAE", + "bumpe" => "\u224F", + "Bumpeq" => "\u224E", + "bumpeq" => "\u224F", + "Cacute" => "\u0106", + "cacute" => "\u0107", + "capand" => "\u2A44", + "capbrcup" => "\u2A49", + "capcap" => "\u2A4B", + "cap" => "\u2229", + "Cap" => "\u22D2", + "capcup" => "\u2A47", + "capdot" => "\u2A40", + "CapitalDifferentialD" => "\u2145", + "caps" => "\u2229\uFE00", + "caret" => "\u2041", + "caron" => "\u02C7", + "Cayleys" => "\u212D", + "ccaps" => "\u2A4D", + "Ccaron" => "\u010C", + "ccaron" => "\u010D", + "Ccedil" => "\u00C7", + "ccedil" => "\u00E7", + "Ccirc" => "\u0108", + "ccirc" => "\u0109", + "Cconint" => "\u2230", + "ccups" => "\u2A4C", + "ccupssm" => "\u2A50", + "Cdot" => "\u010A", + "cdot" => "\u010B", + "cedil" => "\u00B8", + "Cedilla" => "\u00B8", + "cemptyv" => "\u29B2", + "cent" => "\u00A2", + "centerdot" => "\u00B7", + "CenterDot" => "\u00B7", + "cfr" => "\xED\xA0\xB5\xED\xB4\xA0", + "Cfr" => "\u212D", + "CHcy" => "\u0427", + "chcy" => "\u0447", + "check" => "\u2713", + "checkmark" => "\u2713", + "Chi" => "\u03A7", + "chi" => "\u03C7", + "circ" => "\u02C6", + "circeq" => "\u2257", + "circlearrowleft" => "\u21BA", + "circlearrowright" => "\u21BB", + "circledast" => "\u229B", + "circledcirc" => "\u229A", + "circleddash" => "\u229D", + "CircleDot" => "\u2299", + "circledR" => "\u00AE", + "circledS" => "\u24C8", + "CircleMinus" => "\u2296", + "CirclePlus" => "\u2295", + "CircleTimes" => "\u2297", + "cir" => "\u25CB", + "cirE" => "\u29C3", + "cire" => "\u2257", + "cirfnint" => "\u2A10", + "cirmid" => "\u2AEF", + "cirscir" => "\u29C2", + "ClockwiseContourIntegral" => "\u2232", + "CloseCurlyDoubleQuote" => "\u201D", + "CloseCurlyQuote" => "\u2019", + "clubs" => "\u2663", + "clubsuit" => "\u2663", + "colon" => ":", + "Colon" => "\u2237", + "Colone" => "\u2A74", + "colone" => "\u2254", + "coloneq" => "\u2254", + "comma" => ",", + "commat" => "@", + "comp" => "\u2201", + "compfn" => "\u2218", + "complement" => "\u2201", + "complexes" => "\u2102", + "cong" => "\u2245", + "congdot" => "\u2A6D", + "Congruent" => "\u2261", + "conint" => "\u222E", + "Conint" => "\u222F", + "ContourIntegral" => "\u222E", + "copf" => "\xED\xA0\xB5\xED\xB5\x94", + "Copf" => "\u2102", + "coprod" => "\u2210", + "Coproduct" => "\u2210", + "copy" => "\u00A9", + "COPY" => "\u00A9", + "copysr" => "\u2117", + "CounterClockwiseContourIntegral" => "\u2233", + "crarr" => "\u21B5", + "cross" => "\u2717", + "Cross" => "\u2A2F", + "Cscr" => "\xED\xA0\xB5\xED\xB2\x9E", + "cscr" => "\xED\xA0\xB5\xED\xB2\xB8", + "csub" => "\u2ACF", + "csube" => "\u2AD1", + "csup" => "\u2AD0", + "csupe" => "\u2AD2", + "ctdot" => "\u22EF", + "cudarrl" => "\u2938", + "cudarrr" => "\u2935", + "cuepr" => "\u22DE", + "cuesc" => "\u22DF", + "cularr" => "\u21B6", + "cularrp" => "\u293D", + "cupbrcap" => "\u2A48", + "cupcap" => "\u2A46", + "CupCap" => "\u224D", + "cup" => "\u222A", + "Cup" => "\u22D3", + "cupcup" => "\u2A4A", + "cupdot" => "\u228D", + "cupor" => "\u2A45", + "cups" => "\u222A\uFE00", + "curarr" => "\u21B7", + "curarrm" => "\u293C", + "curlyeqprec" => "\u22DE", + "curlyeqsucc" => "\u22DF", + "curlyvee" => "\u22CE", + "curlywedge" => "\u22CF", + "curren" => "\u00A4", + "curvearrowleft" => "\u21B6", + "curvearrowright" => "\u21B7", + "cuvee" => "\u22CE", + "cuwed" => "\u22CF", + "cwconint" => "\u2232", + "cwint" => "\u2231", + "cylcty" => "\u232D", + "dagger" => "\u2020", + "Dagger" => "\u2021", + "daleth" => "\u2138", + "darr" => "\u2193", + "Darr" => "\u21A1", + "dArr" => "\u21D3", + "dash" => "\u2010", + "Dashv" => "\u2AE4", + "dashv" => "\u22A3", + "dbkarow" => "\u290F", + "dblac" => "\u02DD", + "Dcaron" => "\u010E", + "dcaron" => "\u010F", + "Dcy" => "\u0414", + "dcy" => "\u0434", + "ddagger" => "\u2021", + "ddarr" => "\u21CA", + "DD" => "\u2145", + "dd" => "\u2146", + "DDotrahd" => "\u2911", + "ddotseq" => "\u2A77", + "deg" => "\u00B0", + "Del" => "\u2207", + "Delta" => "\u0394", + "delta" => "\u03B4", + "demptyv" => "\u29B1", + "dfisht" => "\u297F", + "Dfr" => "\xED\xA0\xB5\xED\xB4\x87", + "dfr" => "\xED\xA0\xB5\xED\xB4\xA1", + "dHar" => "\u2965", + "dharl" => "\u21C3", + "dharr" => "\u21C2", + "DiacriticalAcute" => "\u00B4", + "DiacriticalDot" => "\u02D9", + "DiacriticalDoubleAcute" => "\u02DD", + "DiacriticalGrave" => "`", + "DiacriticalTilde" => "\u02DC", + "diam" => "\u22C4", + "diamond" => "\u22C4", + "Diamond" => "\u22C4", + "diamondsuit" => "\u2666", + "diams" => "\u2666", + "die" => "\u00A8", + "DifferentialD" => "\u2146", + "digamma" => "\u03DD", + "disin" => "\u22F2", + "div" => "\u00F7", + "divide" => "\u00F7", + "divideontimes" => "\u22C7", + "divonx" => "\u22C7", + "DJcy" => "\u0402", + "djcy" => "\u0452", + "dlcorn" => "\u231E", + "dlcrop" => "\u230D", + "dollar" => "$", + "Dopf" => "\xED\xA0\xB5\xED\xB4\xBB", + "dopf" => "\xED\xA0\xB5\xED\xB5\x95", + "Dot" => "\u00A8", + "dot" => "\u02D9", + "DotDot" => "\u20DC", + "doteq" => "\u2250", + "doteqdot" => "\u2251", + "DotEqual" => "\u2250", + "dotminus" => "\u2238", + "dotplus" => "\u2214", + "dotsquare" => "\u22A1", + "doublebarwedge" => "\u2306", + "DoubleContourIntegral" => "\u222F", + "DoubleDot" => "\u00A8", + "DoubleDownArrow" => "\u21D3", + "DoubleLeftArrow" => "\u21D0", + "DoubleLeftRightArrow" => "\u21D4", + "DoubleLeftTee" => "\u2AE4", + "DoubleLongLeftArrow" => "\u27F8", + "DoubleLongLeftRightArrow" => "\u27FA", + "DoubleLongRightArrow" => "\u27F9", + "DoubleRightArrow" => "\u21D2", + "DoubleRightTee" => "\u22A8", + "DoubleUpArrow" => "\u21D1", + "DoubleUpDownArrow" => "\u21D5", + "DoubleVerticalBar" => "\u2225", + "DownArrowBar" => "\u2913", + "downarrow" => "\u2193", + "DownArrow" => "\u2193", + "Downarrow" => "\u21D3", + "DownArrowUpArrow" => "\u21F5", + "DownBreve" => "\u0311", + "downdownarrows" => "\u21CA", + "downharpoonleft" => "\u21C3", + "downharpoonright" => "\u21C2", + "DownLeftRightVector" => "\u2950", + "DownLeftTeeVector" => "\u295E", + "DownLeftVectorBar" => "\u2956", + "DownLeftVector" => "\u21BD", + "DownRightTeeVector" => "\u295F", + "DownRightVectorBar" => "\u2957", + "DownRightVector" => "\u21C1", + "DownTeeArrow" => "\u21A7", + "DownTee" => "\u22A4", + "drbkarow" => "\u2910", + "drcorn" => "\u231F", + "drcrop" => "\u230C", + "Dscr" => "\xED\xA0\xB5\xED\xB2\x9F", + "dscr" => "\xED\xA0\xB5\xED\xB2\xB9", + "DScy" => "\u0405", + "dscy" => "\u0455", + "dsol" => "\u29F6", + "Dstrok" => "\u0110", + "dstrok" => "\u0111", + "dtdot" => "\u22F1", + "dtri" => "\u25BF", + "dtrif" => "\u25BE", + "duarr" => "\u21F5", + "duhar" => "\u296F", + "dwangle" => "\u29A6", + "DZcy" => "\u040F", + "dzcy" => "\u045F", + "dzigrarr" => "\u27FF", + "Eacute" => "\u00C9", + "eacute" => "\u00E9", + "easter" => "\u2A6E", + "Ecaron" => "\u011A", + "ecaron" => "\u011B", + "Ecirc" => "\u00CA", + "ecirc" => "\u00EA", + "ecir" => "\u2256", + "ecolon" => "\u2255", + "Ecy" => "\u042D", + "ecy" => "\u044D", + "eDDot" => "\u2A77", + "Edot" => "\u0116", + "edot" => "\u0117", + "eDot" => "\u2251", + "ee" => "\u2147", + "efDot" => "\u2252", + "Efr" => "\xED\xA0\xB5\xED\xB4\x88", + "efr" => "\xED\xA0\xB5\xED\xB4\xA2", + "eg" => "\u2A9A", + "Egrave" => "\u00C8", + "egrave" => "\u00E8", + "egs" => "\u2A96", + "egsdot" => "\u2A98", + "el" => "\u2A99", + "Element" => "\u2208", + "elinters" => "\u23E7", + "ell" => "\u2113", + "els" => "\u2A95", + "elsdot" => "\u2A97", + "Emacr" => "\u0112", + "emacr" => "\u0113", + "empty" => "\u2205", + "emptyset" => "\u2205", + "EmptySmallSquare" => "\u25FB", + "emptyv" => "\u2205", + "EmptyVerySmallSquare" => "\u25AB", + "emsp13" => "\u2004", + "emsp14" => "\u2005", + "emsp" => "\u2003", + "ENG" => "\u014A", + "eng" => "\u014B", + "ensp" => "\u2002", + "Eogon" => "\u0118", + "eogon" => "\u0119", + "Eopf" => "\xED\xA0\xB5\xED\xB4\xBC", + "eopf" => "\xED\xA0\xB5\xED\xB5\x96", + "epar" => "\u22D5", + "eparsl" => "\u29E3", + "eplus" => "\u2A71", + "epsi" => "\u03B5", + "Epsilon" => "\u0395", + "epsilon" => "\u03B5", + "epsiv" => "\u03F5", + "eqcirc" => "\u2256", + "eqcolon" => "\u2255", + "eqsim" => "\u2242", + "eqslantgtr" => "\u2A96", + "eqslantless" => "\u2A95", + "Equal" => "\u2A75", + "equals" => "=", + "EqualTilde" => "\u2242", + "equest" => "\u225F", + "Equilibrium" => "\u21CC", + "equiv" => "\u2261", + "equivDD" => "\u2A78", + "eqvparsl" => "\u29E5", + "erarr" => "\u2971", + "erDot" => "\u2253", + "escr" => "\u212F", + "Escr" => "\u2130", + "esdot" => "\u2250", + "Esim" => "\u2A73", + "esim" => "\u2242", + "Eta" => "\u0397", + "eta" => "\u03B7", + "ETH" => "\u00D0", + "eth" => "\u00F0", + "Euml" => "\u00CB", + "euml" => "\u00EB", + "euro" => "\u20AC", + "excl" => "!", + "exist" => "\u2203", + "Exists" => "\u2203", + "expectation" => "\u2130", + "exponentiale" => "\u2147", + "ExponentialE" => "\u2147", + "fallingdotseq" => "\u2252", + "Fcy" => "\u0424", + "fcy" => "\u0444", + "female" => "\u2640", + "ffilig" => "\uFB03", + "fflig" => "\uFB00", + "ffllig" => "\uFB04", + "Ffr" => "\xED\xA0\xB5\xED\xB4\x89", + "ffr" => "\xED\xA0\xB5\xED\xB4\xA3", + "filig" => "\uFB01", + "FilledSmallSquare" => "\u25FC", + "FilledVerySmallSquare" => "\u25AA", + "fjlig" => "fj", + "flat" => "\u266D", + "fllig" => "\uFB02", + "fltns" => "\u25B1", + "fnof" => "\u0192", + "Fopf" => "\xED\xA0\xB5\xED\xB4\xBD", + "fopf" => "\xED\xA0\xB5\xED\xB5\x97", + "forall" => "\u2200", + "ForAll" => "\u2200", + "fork" => "\u22D4", + "forkv" => "\u2AD9", + "Fouriertrf" => "\u2131", + "fpartint" => "\u2A0D", + "frac12" => "\u00BD", + "frac13" => "\u2153", + "frac14" => "\u00BC", + "frac15" => "\u2155", + "frac16" => "\u2159", + "frac18" => "\u215B", + "frac23" => "\u2154", + "frac25" => "\u2156", + "frac34" => "\u00BE", + "frac35" => "\u2157", + "frac38" => "\u215C", + "frac45" => "\u2158", + "frac56" => "\u215A", + "frac58" => "\u215D", + "frac78" => "\u215E", + "frasl" => "\u2044", + "frown" => "\u2322", + "fscr" => "\xED\xA0\xB5\xED\xB2\xBB", + "Fscr" => "\u2131", + "gacute" => "\u01F5", + "Gamma" => "\u0393", + "gamma" => "\u03B3", + "Gammad" => "\u03DC", + "gammad" => "\u03DD", + "gap" => "\u2A86", + "Gbreve" => "\u011E", + "gbreve" => "\u011F", + "Gcedil" => "\u0122", + "Gcirc" => "\u011C", + "gcirc" => "\u011D", + "Gcy" => "\u0413", + "gcy" => "\u0433", + "Gdot" => "\u0120", + "gdot" => "\u0121", + "ge" => "\u2265", + "gE" => "\u2267", + "gEl" => "\u2A8C", + "gel" => "\u22DB", + "geq" => "\u2265", + "geqq" => "\u2267", + "geqslant" => "\u2A7E", + "gescc" => "\u2AA9", + "ges" => "\u2A7E", + "gesdot" => "\u2A80", + "gesdoto" => "\u2A82", + "gesdotol" => "\u2A84", + "gesl" => "\u22DB\uFE00", + "gesles" => "\u2A94", + "Gfr" => "\xED\xA0\xB5\xED\xB4\x8A", + "gfr" => "\xED\xA0\xB5\xED\xB4\xA4", + "gg" => "\u226B", + "Gg" => "\u22D9", + "ggg" => "\u22D9", + "gimel" => "\u2137", + "GJcy" => "\u0403", + "gjcy" => "\u0453", + "gla" => "\u2AA5", + "gl" => "\u2277", + "glE" => "\u2A92", + "glj" => "\u2AA4", + "gnap" => "\u2A8A", + "gnapprox" => "\u2A8A", + "gne" => "\u2A88", + "gnE" => "\u2269", + "gneq" => "\u2A88", + "gneqq" => "\u2269", + "gnsim" => "\u22E7", + "Gopf" => "\xED\xA0\xB5\xED\xB4\xBE", + "gopf" => "\xED\xA0\xB5\xED\xB5\x98", + "grave" => "`", + "GreaterEqual" => "\u2265", + "GreaterEqualLess" => "\u22DB", + "GreaterFullEqual" => "\u2267", + "GreaterGreater" => "\u2AA2", + "GreaterLess" => "\u2277", + "GreaterSlantEqual" => "\u2A7E", + "GreaterTilde" => "\u2273", + "Gscr" => "\xED\xA0\xB5\xED\xB2\xA2", + "gscr" => "\u210A", + "gsim" => "\u2273", + "gsime" => "\u2A8E", + "gsiml" => "\u2A90", + "gtcc" => "\u2AA7", + "gtcir" => "\u2A7A", + "gt" => ">", + "GT" => ">", + "Gt" => "\u226B", + "gtdot" => "\u22D7", + "gtlPar" => "\u2995", + "gtquest" => "\u2A7C", + "gtrapprox" => "\u2A86", + "gtrarr" => "\u2978", + "gtrdot" => "\u22D7", + "gtreqless" => "\u22DB", + "gtreqqless" => "\u2A8C", + "gtrless" => "\u2277", + "gtrsim" => "\u2273", + "gvertneqq" => "\u2269\uFE00", + "gvnE" => "\u2269\uFE00", + "Hacek" => "\u02C7", + "hairsp" => "\u200A", + "half" => "\u00BD", + "hamilt" => "\u210B", + "HARDcy" => "\u042A", + "hardcy" => "\u044A", + "harrcir" => "\u2948", + "harr" => "\u2194", + "hArr" => "\u21D4", + "harrw" => "\u21AD", + "Hat" => "^", + "hbar" => "\u210F", + "Hcirc" => "\u0124", + "hcirc" => "\u0125", + "hearts" => "\u2665", + "heartsuit" => "\u2665", + "hellip" => "\u2026", + "hercon" => "\u22B9", + "hfr" => "\xED\xA0\xB5\xED\xB4\xA5", + "Hfr" => "\u210C", + "HilbertSpace" => "\u210B", + "hksearow" => "\u2925", + "hkswarow" => "\u2926", + "hoarr" => "\u21FF", + "homtht" => "\u223B", + "hookleftarrow" => "\u21A9", + "hookrightarrow" => "\u21AA", + "hopf" => "\xED\xA0\xB5\xED\xB5\x99", + "Hopf" => "\u210D", + "horbar" => "\u2015", + "HorizontalLine" => "\u2500", + "hscr" => "\xED\xA0\xB5\xED\xB2\xBD", + "Hscr" => "\u210B", + "hslash" => "\u210F", + "Hstrok" => "\u0126", + "hstrok" => "\u0127", + "HumpDownHump" => "\u224E", + "HumpEqual" => "\u224F", + "hybull" => "\u2043", + "hyphen" => "\u2010", + "Iacute" => "\u00CD", + "iacute" => "\u00ED", + "ic" => "\u2063", + "Icirc" => "\u00CE", + "icirc" => "\u00EE", + "Icy" => "\u0418", + "icy" => "\u0438", + "Idot" => "\u0130", + "IEcy" => "\u0415", + "iecy" => "\u0435", + "iexcl" => "\u00A1", + "iff" => "\u21D4", + "ifr" => "\xED\xA0\xB5\xED\xB4\xA6", + "Ifr" => "\u2111", + "Igrave" => "\u00CC", + "igrave" => "\u00EC", + "ii" => "\u2148", + "iiiint" => "\u2A0C", + "iiint" => "\u222D", + "iinfin" => "\u29DC", + "iiota" => "\u2129", + "IJlig" => "\u0132", + "ijlig" => "\u0133", + "Imacr" => "\u012A", + "imacr" => "\u012B", + "image" => "\u2111", + "ImaginaryI" => "\u2148", + "imagline" => "\u2110", + "imagpart" => "\u2111", + "imath" => "\u0131", + "Im" => "\u2111", + "imof" => "\u22B7", + "imped" => "\u01B5", + "Implies" => "\u21D2", + "incare" => "\u2105", + "in" => "\u2208", + "infin" => "\u221E", + "infintie" => "\u29DD", + "inodot" => "\u0131", + "intcal" => "\u22BA", + "int" => "\u222B", + "Int" => "\u222C", + "integers" => "\u2124", + "Integral" => "\u222B", + "intercal" => "\u22BA", + "Intersection" => "\u22C2", + "intlarhk" => "\u2A17", + "intprod" => "\u2A3C", + "InvisibleComma" => "\u2063", + "InvisibleTimes" => "\u2062", + "IOcy" => "\u0401", + "iocy" => "\u0451", + "Iogon" => "\u012E", + "iogon" => "\u012F", + "Iopf" => "\xED\xA0\xB5\xED\xB5\x80", + "iopf" => "\xED\xA0\xB5\xED\xB5\x9A", + "Iota" => "\u0399", + "iota" => "\u03B9", + "iprod" => "\u2A3C", + "iquest" => "\u00BF", + "iscr" => "\xED\xA0\xB5\xED\xB2\xBE", + "Iscr" => "\u2110", + "isin" => "\u2208", + "isindot" => "\u22F5", + "isinE" => "\u22F9", + "isins" => "\u22F4", + "isinsv" => "\u22F3", + "isinv" => "\u2208", + "it" => "\u2062", + "Itilde" => "\u0128", + "itilde" => "\u0129", + "Iukcy" => "\u0406", + "iukcy" => "\u0456", + "Iuml" => "\u00CF", + "iuml" => "\u00EF", + "Jcirc" => "\u0134", + "jcirc" => "\u0135", + "Jcy" => "\u0419", + "jcy" => "\u0439", + "Jfr" => "\xED\xA0\xB5\xED\xB4\x8D", + "jfr" => "\xED\xA0\xB5\xED\xB4\xA7", + "jmath" => "\u0237", + "Jopf" => "\xED\xA0\xB5\xED\xB5\x81", + "jopf" => "\xED\xA0\xB5\xED\xB5\x9B", + "Jscr" => "\xED\xA0\xB5\xED\xB2\xA5", + "jscr" => "\xED\xA0\xB5\xED\xB2\xBF", + "Jsercy" => "\u0408", + "jsercy" => "\u0458", + "Jukcy" => "\u0404", + "jukcy" => "\u0454", + "Kappa" => "\u039A", + "kappa" => "\u03BA", + "kappav" => "\u03F0", + "Kcedil" => "\u0136", + "kcedil" => "\u0137", + "Kcy" => "\u041A", + "kcy" => "\u043A", + "Kfr" => "\xED\xA0\xB5\xED\xB4\x8E", + "kfr" => "\xED\xA0\xB5\xED\xB4\xA8", + "kgreen" => "\u0138", + "KHcy" => "\u0425", + "khcy" => "\u0445", + "KJcy" => "\u040C", + "kjcy" => "\u045C", + "Kopf" => "\xED\xA0\xB5\xED\xB5\x82", + "kopf" => "\xED\xA0\xB5\xED\xB5\x9C", + "Kscr" => "\xED\xA0\xB5\xED\xB2\xA6", + "kscr" => "\xED\xA0\xB5\xED\xB3\x80", + "lAarr" => "\u21DA", + "Lacute" => "\u0139", + "lacute" => "\u013A", + "laemptyv" => "\u29B4", + "lagran" => "\u2112", + "Lambda" => "\u039B", + "lambda" => "\u03BB", + "lang" => "\u27E8", + "Lang" => "\u27EA", + "langd" => "\u2991", + "langle" => "\u27E8", + "lap" => "\u2A85", + "Laplacetrf" => "\u2112", + "laquo" => "\u00AB", + "larrb" => "\u21E4", + "larrbfs" => "\u291F", + "larr" => "\u2190", + "Larr" => "\u219E", + "lArr" => "\u21D0", + "larrfs" => "\u291D", + "larrhk" => "\u21A9", + "larrlp" => "\u21AB", + "larrpl" => "\u2939", + "larrsim" => "\u2973", + "larrtl" => "\u21A2", + "latail" => "\u2919", + "lAtail" => "\u291B", + "lat" => "\u2AAB", + "late" => "\u2AAD", + "lates" => "\u2AAD\uFE00", + "lbarr" => "\u290C", + "lBarr" => "\u290E", + "lbbrk" => "\u2772", + "lbrace" => "{", + "lbrack" => "[", + "lbrke" => "\u298B", + "lbrksld" => "\u298F", + "lbrkslu" => "\u298D", + "Lcaron" => "\u013D", + "lcaron" => "\u013E", + "Lcedil" => "\u013B", + "lcedil" => "\u013C", + "lceil" => "\u2308", + "lcub" => "{", + "Lcy" => "\u041B", + "lcy" => "\u043B", + "ldca" => "\u2936", + "ldquo" => "\u201C", + "ldquor" => "\u201E", + "ldrdhar" => "\u2967", + "ldrushar" => "\u294B", + "ldsh" => "\u21B2", + "le" => "\u2264", + "lE" => "\u2266", + "LeftAngleBracket" => "\u27E8", + "LeftArrowBar" => "\u21E4", + "leftarrow" => "\u2190", + "LeftArrow" => "\u2190", + "Leftarrow" => "\u21D0", + "LeftArrowRightArrow" => "\u21C6", + "leftarrowtail" => "\u21A2", + "LeftCeiling" => "\u2308", + "LeftDoubleBracket" => "\u27E6", + "LeftDownTeeVector" => "\u2961", + "LeftDownVectorBar" => "\u2959", + "LeftDownVector" => "\u21C3", + "LeftFloor" => "\u230A", + "leftharpoondown" => "\u21BD", + "leftharpoonup" => "\u21BC", + "leftleftarrows" => "\u21C7", + "leftrightarrow" => "\u2194", + "LeftRightArrow" => "\u2194", + "Leftrightarrow" => "\u21D4", + "leftrightarrows" => "\u21C6", + "leftrightharpoons" => "\u21CB", + "leftrightsquigarrow" => "\u21AD", + "LeftRightVector" => "\u294E", + "LeftTeeArrow" => "\u21A4", + "LeftTee" => "\u22A3", + "LeftTeeVector" => "\u295A", + "leftthreetimes" => "\u22CB", + "LeftTriangleBar" => "\u29CF", + "LeftTriangle" => "\u22B2", + "LeftTriangleEqual" => "\u22B4", + "LeftUpDownVector" => "\u2951", + "LeftUpTeeVector" => "\u2960", + "LeftUpVectorBar" => "\u2958", + "LeftUpVector" => "\u21BF", + "LeftVectorBar" => "\u2952", + "LeftVector" => "\u21BC", + "lEg" => "\u2A8B", + "leg" => "\u22DA", + "leq" => "\u2264", + "leqq" => "\u2266", + "leqslant" => "\u2A7D", + "lescc" => "\u2AA8", + "les" => "\u2A7D", + "lesdot" => "\u2A7F", + "lesdoto" => "\u2A81", + "lesdotor" => "\u2A83", + "lesg" => "\u22DA\uFE00", + "lesges" => "\u2A93", + "lessapprox" => "\u2A85", + "lessdot" => "\u22D6", + "lesseqgtr" => "\u22DA", + "lesseqqgtr" => "\u2A8B", + "LessEqualGreater" => "\u22DA", + "LessFullEqual" => "\u2266", + "LessGreater" => "\u2276", + "lessgtr" => "\u2276", + "LessLess" => "\u2AA1", + "lesssim" => "\u2272", + "LessSlantEqual" => "\u2A7D", + "LessTilde" => "\u2272", + "lfisht" => "\u297C", + "lfloor" => "\u230A", + "Lfr" => "\xED\xA0\xB5\xED\xB4\x8F", + "lfr" => "\xED\xA0\xB5\xED\xB4\xA9", + "lg" => "\u2276", + "lgE" => "\u2A91", + "lHar" => "\u2962", + "lhard" => "\u21BD", + "lharu" => "\u21BC", + "lharul" => "\u296A", + "lhblk" => "\u2584", + "LJcy" => "\u0409", + "ljcy" => "\u0459", + "llarr" => "\u21C7", + "ll" => "\u226A", + "Ll" => "\u22D8", + "llcorner" => "\u231E", + "Lleftarrow" => "\u21DA", + "llhard" => "\u296B", + "lltri" => "\u25FA", + "Lmidot" => "\u013F", + "lmidot" => "\u0140", + "lmoustache" => "\u23B0", + "lmoust" => "\u23B0", + "lnap" => "\u2A89", + "lnapprox" => "\u2A89", + "lne" => "\u2A87", + "lnE" => "\u2268", + "lneq" => "\u2A87", + "lneqq" => "\u2268", + "lnsim" => "\u22E6", + "loang" => "\u27EC", + "loarr" => "\u21FD", + "lobrk" => "\u27E6", + "longleftarrow" => "\u27F5", + "LongLeftArrow" => "\u27F5", + "Longleftarrow" => "\u27F8", + "longleftrightarrow" => "\u27F7", + "LongLeftRightArrow" => "\u27F7", + "Longleftrightarrow" => "\u27FA", + "longmapsto" => "\u27FC", + "longrightarrow" => "\u27F6", + "LongRightArrow" => "\u27F6", + "Longrightarrow" => "\u27F9", + "looparrowleft" => "\u21AB", + "looparrowright" => "\u21AC", + "lopar" => "\u2985", + "Lopf" => "\xED\xA0\xB5\xED\xB5\x83", + "lopf" => "\xED\xA0\xB5\xED\xB5\x9D", + "loplus" => "\u2A2D", + "lotimes" => "\u2A34", + "lowast" => "\u2217", + "lowbar" => "_", + "LowerLeftArrow" => "\u2199", + "LowerRightArrow" => "\u2198", + "loz" => "\u25CA", + "lozenge" => "\u25CA", + "lozf" => "\u29EB", + "lpar" => "(", + "lparlt" => "\u2993", + "lrarr" => "\u21C6", + "lrcorner" => "\u231F", + "lrhar" => "\u21CB", + "lrhard" => "\u296D", + "lrm" => "\u200E", + "lrtri" => "\u22BF", + "lsaquo" => "\u2039", + "lscr" => "\xED\xA0\xB5\xED\xB3\x81", + "Lscr" => "\u2112", + "lsh" => "\u21B0", + "Lsh" => "\u21B0", + "lsim" => "\u2272", + "lsime" => "\u2A8D", + "lsimg" => "\u2A8F", + "lsqb" => "[", + "lsquo" => "\u2018", + "lsquor" => "\u201A", + "Lstrok" => "\u0141", + "lstrok" => "\u0142", + "ltcc" => "\u2AA6", + "ltcir" => "\u2A79", + "lt" => "<", + "LT" => "<", + "Lt" => "\u226A", + "ltdot" => "\u22D6", + "lthree" => "\u22CB", + "ltimes" => "\u22C9", + "ltlarr" => "\u2976", + "ltquest" => "\u2A7B", + "ltri" => "\u25C3", + "ltrie" => "\u22B4", + "ltrif" => "\u25C2", + "ltrPar" => "\u2996", + "lurdshar" => "\u294A", + "luruhar" => "\u2966", + "lvertneqq" => "\u2268\uFE00", + "lvnE" => "\u2268\uFE00", + "macr" => "\u00AF", + "male" => "\u2642", + "malt" => "\u2720", + "maltese" => "\u2720", + "Map" => "\u2905", + "map" => "\u21A6", + "mapsto" => "\u21A6", + "mapstodown" => "\u21A7", + "mapstoleft" => "\u21A4", + "mapstoup" => "\u21A5", + "marker" => "\u25AE", + "mcomma" => "\u2A29", + "Mcy" => "\u041C", + "mcy" => "\u043C", + "mdash" => "\u2014", + "mDDot" => "\u223A", + "measuredangle" => "\u2221", + "MediumSpace" => "\u205F", + "Mellintrf" => "\u2133", + "Mfr" => "\xED\xA0\xB5\xED\xB4\x90", + "mfr" => "\xED\xA0\xB5\xED\xB4\xAA", + "mho" => "\u2127", + "micro" => "\u00B5", + "midast" => "*", + "midcir" => "\u2AF0", + "mid" => "\u2223", + "middot" => "\u00B7", + "minusb" => "\u229F", + "minus" => "\u2212", + "minusd" => "\u2238", + "minusdu" => "\u2A2A", + "MinusPlus" => "\u2213", + "mlcp" => "\u2ADB", + "mldr" => "\u2026", + "mnplus" => "\u2213", + "models" => "\u22A7", + "Mopf" => "\xED\xA0\xB5\xED\xB5\x84", + "mopf" => "\xED\xA0\xB5\xED\xB5\x9E", + "mp" => "\u2213", + "mscr" => "\xED\xA0\xB5\xED\xB3\x82", + "Mscr" => "\u2133", + "mstpos" => "\u223E", + "Mu" => "\u039C", + "mu" => "\u03BC", + "multimap" => "\u22B8", + "mumap" => "\u22B8", + "nabla" => "\u2207", + "Nacute" => "\u0143", + "nacute" => "\u0144", + "nang" => "\u2220\u20D2", + "nap" => "\u2249", + "napE" => "\u2A70\u0338", + "napid" => "\u224B\u0338", + "napos" => "\u0149", + "napprox" => "\u2249", + "natural" => "\u266E", + "naturals" => "\u2115", + "natur" => "\u266E", + "nbsp" => "\u00A0", + "nbump" => "\u224E\u0338", + "nbumpe" => "\u224F\u0338", + "ncap" => "\u2A43", + "Ncaron" => "\u0147", + "ncaron" => "\u0148", + "Ncedil" => "\u0145", + "ncedil" => "\u0146", + "ncong" => "\u2247", + "ncongdot" => "\u2A6D\u0338", + "ncup" => "\u2A42", + "Ncy" => "\u041D", + "ncy" => "\u043D", + "ndash" => "\u2013", + "nearhk" => "\u2924", + "nearr" => "\u2197", + "neArr" => "\u21D7", + "nearrow" => "\u2197", + "ne" => "\u2260", + "nedot" => "\u2250\u0338", + "NegativeMediumSpace" => "\u200B", + "NegativeThickSpace" => "\u200B", + "NegativeThinSpace" => "\u200B", + "NegativeVeryThinSpace" => "\u200B", + "nequiv" => "\u2262", + "nesear" => "\u2928", + "nesim" => "\u2242\u0338", + "NestedGreaterGreater" => "\u226B", + "NestedLessLess" => "\u226A", + "NewLine" => "\n", + "nexist" => "\u2204", + "nexists" => "\u2204", + "Nfr" => "\xED\xA0\xB5\xED\xB4\x91", + "nfr" => "\xED\xA0\xB5\xED\xB4\xAB", + "ngE" => "\u2267\u0338", + "nge" => "\u2271", + "ngeq" => "\u2271", + "ngeqq" => "\u2267\u0338", + "ngeqslant" => "\u2A7E\u0338", + "nges" => "\u2A7E\u0338", + "nGg" => "\u22D9\u0338", + "ngsim" => "\u2275", + "nGt" => "\u226B\u20D2", + "ngt" => "\u226F", + "ngtr" => "\u226F", + "nGtv" => "\u226B\u0338", + "nharr" => "\u21AE", + "nhArr" => "\u21CE", + "nhpar" => "\u2AF2", + "ni" => "\u220B", + "nis" => "\u22FC", + "nisd" => "\u22FA", + "niv" => "\u220B", + "NJcy" => "\u040A", + "njcy" => "\u045A", + "nlarr" => "\u219A", + "nlArr" => "\u21CD", + "nldr" => "\u2025", + "nlE" => "\u2266\u0338", + "nle" => "\u2270", + "nleftarrow" => "\u219A", + "nLeftarrow" => "\u21CD", + "nleftrightarrow" => "\u21AE", + "nLeftrightarrow" => "\u21CE", + "nleq" => "\u2270", + "nleqq" => "\u2266\u0338", + "nleqslant" => "\u2A7D\u0338", + "nles" => "\u2A7D\u0338", + "nless" => "\u226E", + "nLl" => "\u22D8\u0338", + "nlsim" => "\u2274", + "nLt" => "\u226A\u20D2", + "nlt" => "\u226E", + "nltri" => "\u22EA", + "nltrie" => "\u22EC", + "nLtv" => "\u226A\u0338", + "nmid" => "\u2224", + "NoBreak" => "\u2060", + "NonBreakingSpace" => "\u00A0", + "nopf" => "\xED\xA0\xB5\xED\xB5\x9F", + "Nopf" => "\u2115", + "Not" => "\u2AEC", + "not" => "\u00AC", + "NotCongruent" => "\u2262", + "NotCupCap" => "\u226D", + "NotDoubleVerticalBar" => "\u2226", + "NotElement" => "\u2209", + "NotEqual" => "\u2260", + "NotEqualTilde" => "\u2242\u0338", + "NotExists" => "\u2204", + "NotGreater" => "\u226F", + "NotGreaterEqual" => "\u2271", + "NotGreaterFullEqual" => "\u2267\u0338", + "NotGreaterGreater" => "\u226B\u0338", + "NotGreaterLess" => "\u2279", + "NotGreaterSlantEqual" => "\u2A7E\u0338", + "NotGreaterTilde" => "\u2275", + "NotHumpDownHump" => "\u224E\u0338", + "NotHumpEqual" => "\u224F\u0338", + "notin" => "\u2209", + "notindot" => "\u22F5\u0338", + "notinE" => "\u22F9\u0338", + "notinva" => "\u2209", + "notinvb" => "\u22F7", + "notinvc" => "\u22F6", + "NotLeftTriangleBar" => "\u29CF\u0338", + "NotLeftTriangle" => "\u22EA", + "NotLeftTriangleEqual" => "\u22EC", + "NotLess" => "\u226E", + "NotLessEqual" => "\u2270", + "NotLessGreater" => "\u2278", + "NotLessLess" => "\u226A\u0338", + "NotLessSlantEqual" => "\u2A7D\u0338", + "NotLessTilde" => "\u2274", + "NotNestedGreaterGreater" => "\u2AA2\u0338", + "NotNestedLessLess" => "\u2AA1\u0338", + "notni" => "\u220C", + "notniva" => "\u220C", + "notnivb" => "\u22FE", + "notnivc" => "\u22FD", + "NotPrecedes" => "\u2280", + "NotPrecedesEqual" => "\u2AAF\u0338", + "NotPrecedesSlantEqual" => "\u22E0", + "NotReverseElement" => "\u220C", + "NotRightTriangleBar" => "\u29D0\u0338", + "NotRightTriangle" => "\u22EB", + "NotRightTriangleEqual" => "\u22ED", + "NotSquareSubset" => "\u228F\u0338", + "NotSquareSubsetEqual" => "\u22E2", + "NotSquareSuperset" => "\u2290\u0338", + "NotSquareSupersetEqual" => "\u22E3", + "NotSubset" => "\u2282\u20D2", + "NotSubsetEqual" => "\u2288", + "NotSucceeds" => "\u2281", + "NotSucceedsEqual" => "\u2AB0\u0338", + "NotSucceedsSlantEqual" => "\u22E1", + "NotSucceedsTilde" => "\u227F\u0338", + "NotSuperset" => "\u2283\u20D2", + "NotSupersetEqual" => "\u2289", + "NotTilde" => "\u2241", + "NotTildeEqual" => "\u2244", + "NotTildeFullEqual" => "\u2247", + "NotTildeTilde" => "\u2249", + "NotVerticalBar" => "\u2224", + "nparallel" => "\u2226", + "npar" => "\u2226", + "nparsl" => "\u2AFD\u20E5", + "npart" => "\u2202\u0338", + "npolint" => "\u2A14", + "npr" => "\u2280", + "nprcue" => "\u22E0", + "nprec" => "\u2280", + "npreceq" => "\u2AAF\u0338", + "npre" => "\u2AAF\u0338", + "nrarrc" => "\u2933\u0338", + "nrarr" => "\u219B", + "nrArr" => "\u21CF", + "nrarrw" => "\u219D\u0338", + "nrightarrow" => "\u219B", + "nRightarrow" => "\u21CF", + "nrtri" => "\u22EB", + "nrtrie" => "\u22ED", + "nsc" => "\u2281", + "nsccue" => "\u22E1", + "nsce" => "\u2AB0\u0338", + "Nscr" => "\xED\xA0\xB5\xED\xB2\xA9", + "nscr" => "\xED\xA0\xB5\xED\xB3\x83", + "nshortmid" => "\u2224", + "nshortparallel" => "\u2226", + "nsim" => "\u2241", + "nsime" => "\u2244", + "nsimeq" => "\u2244", + "nsmid" => "\u2224", + "nspar" => "\u2226", + "nsqsube" => "\u22E2", + "nsqsupe" => "\u22E3", + "nsub" => "\u2284", + "nsubE" => "\u2AC5\u0338", + "nsube" => "\u2288", + "nsubset" => "\u2282\u20D2", + "nsubseteq" => "\u2288", + "nsubseteqq" => "\u2AC5\u0338", + "nsucc" => "\u2281", + "nsucceq" => "\u2AB0\u0338", + "nsup" => "\u2285", + "nsupE" => "\u2AC6\u0338", + "nsupe" => "\u2289", + "nsupset" => "\u2283\u20D2", + "nsupseteq" => "\u2289", + "nsupseteqq" => "\u2AC6\u0338", + "ntgl" => "\u2279", + "Ntilde" => "\u00D1", + "ntilde" => "\u00F1", + "ntlg" => "\u2278", + "ntriangleleft" => "\u22EA", + "ntrianglelefteq" => "\u22EC", + "ntriangleright" => "\u22EB", + "ntrianglerighteq" => "\u22ED", + "Nu" => "\u039D", + "nu" => "\u03BD", + "num" => "#", + "numero" => "\u2116", + "numsp" => "\u2007", + "nvap" => "\u224D\u20D2", + "nvdash" => "\u22AC", + "nvDash" => "\u22AD", + "nVdash" => "\u22AE", + "nVDash" => "\u22AF", + "nvge" => "\u2265\u20D2", + "nvgt" => ">\u20D2", + "nvHarr" => "\u2904", + "nvinfin" => "\u29DE", + "nvlArr" => "\u2902", + "nvle" => "\u2264\u20D2", + "nvlt" => "<\u20D2", + "nvltrie" => "\u22B4\u20D2", + "nvrArr" => "\u2903", + "nvrtrie" => "\u22B5\u20D2", + "nvsim" => "\u223C\u20D2", + "nwarhk" => "\u2923", + "nwarr" => "\u2196", + "nwArr" => "\u21D6", + "nwarrow" => "\u2196", + "nwnear" => "\u2927", + "Oacute" => "\u00D3", + "oacute" => "\u00F3", + "oast" => "\u229B", + "Ocirc" => "\u00D4", + "ocirc" => "\u00F4", + "ocir" => "\u229A", + "Ocy" => "\u041E", + "ocy" => "\u043E", + "odash" => "\u229D", + "Odblac" => "\u0150", + "odblac" => "\u0151", + "odiv" => "\u2A38", + "odot" => "\u2299", + "odsold" => "\u29BC", + "OElig" => "\u0152", + "oelig" => "\u0153", + "ofcir" => "\u29BF", + "Ofr" => "\xED\xA0\xB5\xED\xB4\x92", + "ofr" => "\xED\xA0\xB5\xED\xB4\xAC", + "ogon" => "\u02DB", + "Ograve" => "\u00D2", + "ograve" => "\u00F2", + "ogt" => "\u29C1", + "ohbar" => "\u29B5", + "ohm" => "\u03A9", + "oint" => "\u222E", + "olarr" => "\u21BA", + "olcir" => "\u29BE", + "olcross" => "\u29BB", + "oline" => "\u203E", + "olt" => "\u29C0", + "Omacr" => "\u014C", + "omacr" => "\u014D", + "Omega" => "\u03A9", + "omega" => "\u03C9", + "Omicron" => "\u039F", + "omicron" => "\u03BF", + "omid" => "\u29B6", + "ominus" => "\u2296", + "Oopf" => "\xED\xA0\xB5\xED\xB5\x86", + "oopf" => "\xED\xA0\xB5\xED\xB5\xA0", + "opar" => "\u29B7", + "OpenCurlyDoubleQuote" => "\u201C", + "OpenCurlyQuote" => "\u2018", + "operp" => "\u29B9", + "oplus" => "\u2295", + "orarr" => "\u21BB", + "Or" => "\u2A54", + "or" => "\u2228", + "ord" => "\u2A5D", + "order" => "\u2134", + "orderof" => "\u2134", + "ordf" => "\u00AA", + "ordm" => "\u00BA", + "origof" => "\u22B6", + "oror" => "\u2A56", + "orslope" => "\u2A57", + "orv" => "\u2A5B", + "oS" => "\u24C8", + "Oscr" => "\xED\xA0\xB5\xED\xB2\xAA", + "oscr" => "\u2134", + "Oslash" => "\u00D8", + "oslash" => "\u00F8", + "osol" => "\u2298", + "Otilde" => "\u00D5", + "otilde" => "\u00F5", + "otimesas" => "\u2A36", + "Otimes" => "\u2A37", + "otimes" => "\u2297", + "Ouml" => "\u00D6", + "ouml" => "\u00F6", + "ovbar" => "\u233D", + "OverBar" => "\u203E", + "OverBrace" => "\u23DE", + "OverBracket" => "\u23B4", + "OverParenthesis" => "\u23DC", + "para" => "\u00B6", + "parallel" => "\u2225", + "par" => "\u2225", + "parsim" => "\u2AF3", + "parsl" => "\u2AFD", + "part" => "\u2202", + "PartialD" => "\u2202", + "Pcy" => "\u041F", + "pcy" => "\u043F", + "percnt" => "%", + "period" => ".", + "permil" => "\u2030", + "perp" => "\u22A5", + "pertenk" => "\u2031", + "Pfr" => "\xED\xA0\xB5\xED\xB4\x93", + "pfr" => "\xED\xA0\xB5\xED\xB4\xAD", + "Phi" => "\u03A6", + "phi" => "\u03C6", + "phiv" => "\u03D5", + "phmmat" => "\u2133", + "phone" => "\u260E", + "Pi" => "\u03A0", + "pi" => "\u03C0", + "pitchfork" => "\u22D4", + "piv" => "\u03D6", + "planck" => "\u210F", + "planckh" => "\u210E", + "plankv" => "\u210F", + "plusacir" => "\u2A23", + "plusb" => "\u229E", + "pluscir" => "\u2A22", + "plus" => "+", + "plusdo" => "\u2214", + "plusdu" => "\u2A25", + "pluse" => "\u2A72", + "PlusMinus" => "\u00B1", + "plusmn" => "\u00B1", + "plussim" => "\u2A26", + "plustwo" => "\u2A27", + "pm" => "\u00B1", + "Poincareplane" => "\u210C", + "pointint" => "\u2A15", + "popf" => "\xED\xA0\xB5\xED\xB5\xA1", + "Popf" => "\u2119", + "pound" => "\u00A3", + "prap" => "\u2AB7", + "Pr" => "\u2ABB", + "pr" => "\u227A", + "prcue" => "\u227C", + "precapprox" => "\u2AB7", + "prec" => "\u227A", + "preccurlyeq" => "\u227C", + "Precedes" => "\u227A", + "PrecedesEqual" => "\u2AAF", + "PrecedesSlantEqual" => "\u227C", + "PrecedesTilde" => "\u227E", + "preceq" => "\u2AAF", + "precnapprox" => "\u2AB9", + "precneqq" => "\u2AB5", + "precnsim" => "\u22E8", + "pre" => "\u2AAF", + "prE" => "\u2AB3", + "precsim" => "\u227E", + "prime" => "\u2032", + "Prime" => "\u2033", + "primes" => "\u2119", + "prnap" => "\u2AB9", + "prnE" => "\u2AB5", + "prnsim" => "\u22E8", + "prod" => "\u220F", + "Product" => "\u220F", + "profalar" => "\u232E", + "profline" => "\u2312", + "profsurf" => "\u2313", + "prop" => "\u221D", + "Proportional" => "\u221D", + "Proportion" => "\u2237", + "propto" => "\u221D", + "prsim" => "\u227E", + "prurel" => "\u22B0", + "Pscr" => "\xED\xA0\xB5\xED\xB2\xAB", + "pscr" => "\xED\xA0\xB5\xED\xB3\x85", + "Psi" => "\u03A8", + "psi" => "\u03C8", + "puncsp" => "\u2008", + "Qfr" => "\xED\xA0\xB5\xED\xB4\x94", + "qfr" => "\xED\xA0\xB5\xED\xB4\xAE", + "qint" => "\u2A0C", + "qopf" => "\xED\xA0\xB5\xED\xB5\xA2", + "Qopf" => "\u211A", + "qprime" => "\u2057", + "Qscr" => "\xED\xA0\xB5\xED\xB2\xAC", + "qscr" => "\xED\xA0\xB5\xED\xB3\x86", + "quaternions" => "\u210D", + "quatint" => "\u2A16", + "quest" => "?", + "questeq" => "\u225F", + "quot" => "\"", + "QUOT" => "\"", + "rAarr" => "\u21DB", + "race" => "\u223D\u0331", + "Racute" => "\u0154", + "racute" => "\u0155", + "radic" => "\u221A", + "raemptyv" => "\u29B3", + "rang" => "\u27E9", + "Rang" => "\u27EB", + "rangd" => "\u2992", + "range" => "\u29A5", + "rangle" => "\u27E9", + "raquo" => "\u00BB", + "rarrap" => "\u2975", + "rarrb" => "\u21E5", + "rarrbfs" => "\u2920", + "rarrc" => "\u2933", + "rarr" => "\u2192", + "Rarr" => "\u21A0", + "rArr" => "\u21D2", + "rarrfs" => "\u291E", + "rarrhk" => "\u21AA", + "rarrlp" => "\u21AC", + "rarrpl" => "\u2945", + "rarrsim" => "\u2974", + "Rarrtl" => "\u2916", + "rarrtl" => "\u21A3", + "rarrw" => "\u219D", + "ratail" => "\u291A", + "rAtail" => "\u291C", + "ratio" => "\u2236", + "rationals" => "\u211A", + "rbarr" => "\u290D", + "rBarr" => "\u290F", + "RBarr" => "\u2910", + "rbbrk" => "\u2773", + "rbrace" => "}", + "rbrack" => "]", + "rbrke" => "\u298C", + "rbrksld" => "\u298E", + "rbrkslu" => "\u2990", + "Rcaron" => "\u0158", + "rcaron" => "\u0159", + "Rcedil" => "\u0156", + "rcedil" => "\u0157", + "rceil" => "\u2309", + "rcub" => "}", + "Rcy" => "\u0420", + "rcy" => "\u0440", + "rdca" => "\u2937", + "rdldhar" => "\u2969", + "rdquo" => "\u201D", + "rdquor" => "\u201D", + "rdsh" => "\u21B3", + "real" => "\u211C", + "realine" => "\u211B", + "realpart" => "\u211C", + "reals" => "\u211D", + "Re" => "\u211C", + "rect" => "\u25AD", + "reg" => "\u00AE", + "REG" => "\u00AE", + "ReverseElement" => "\u220B", + "ReverseEquilibrium" => "\u21CB", + "ReverseUpEquilibrium" => "\u296F", + "rfisht" => "\u297D", + "rfloor" => "\u230B", + "rfr" => "\xED\xA0\xB5\xED\xB4\xAF", + "Rfr" => "\u211C", + "rHar" => "\u2964", + "rhard" => "\u21C1", + "rharu" => "\u21C0", + "rharul" => "\u296C", + "Rho" => "\u03A1", + "rho" => "\u03C1", + "rhov" => "\u03F1", + "RightAngleBracket" => "\u27E9", + "RightArrowBar" => "\u21E5", + "rightarrow" => "\u2192", + "RightArrow" => "\u2192", + "Rightarrow" => "\u21D2", + "RightArrowLeftArrow" => "\u21C4", + "rightarrowtail" => "\u21A3", + "RightCeiling" => "\u2309", + "RightDoubleBracket" => "\u27E7", + "RightDownTeeVector" => "\u295D", + "RightDownVectorBar" => "\u2955", + "RightDownVector" => "\u21C2", + "RightFloor" => "\u230B", + "rightharpoondown" => "\u21C1", + "rightharpoonup" => "\u21C0", + "rightleftarrows" => "\u21C4", + "rightleftharpoons" => "\u21CC", + "rightrightarrows" => "\u21C9", + "rightsquigarrow" => "\u219D", + "RightTeeArrow" => "\u21A6", + "RightTee" => "\u22A2", + "RightTeeVector" => "\u295B", + "rightthreetimes" => "\u22CC", + "RightTriangleBar" => "\u29D0", + "RightTriangle" => "\u22B3", + "RightTriangleEqual" => "\u22B5", + "RightUpDownVector" => "\u294F", + "RightUpTeeVector" => "\u295C", + "RightUpVectorBar" => "\u2954", + "RightUpVector" => "\u21BE", + "RightVectorBar" => "\u2953", + "RightVector" => "\u21C0", + "ring" => "\u02DA", + "risingdotseq" => "\u2253", + "rlarr" => "\u21C4", + "rlhar" => "\u21CC", + "rlm" => "\u200F", + "rmoustache" => "\u23B1", + "rmoust" => "\u23B1", + "rnmid" => "\u2AEE", + "roang" => "\u27ED", + "roarr" => "\u21FE", + "robrk" => "\u27E7", + "ropar" => "\u2986", + "ropf" => "\xED\xA0\xB5\xED\xB5\xA3", + "Ropf" => "\u211D", + "roplus" => "\u2A2E", + "rotimes" => "\u2A35", + "RoundImplies" => "\u2970", + "rpar" => ")", + "rpargt" => "\u2994", + "rppolint" => "\u2A12", + "rrarr" => "\u21C9", + "Rrightarrow" => "\u21DB", + "rsaquo" => "\u203A", + "rscr" => "\xED\xA0\xB5\xED\xB3\x87", + "Rscr" => "\u211B", + "rsh" => "\u21B1", + "Rsh" => "\u21B1", + "rsqb" => "]", + "rsquo" => "\u2019", + "rsquor" => "\u2019", + "rthree" => "\u22CC", + "rtimes" => "\u22CA", + "rtri" => "\u25B9", + "rtrie" => "\u22B5", + "rtrif" => "\u25B8", + "rtriltri" => "\u29CE", + "RuleDelayed" => "\u29F4", + "ruluhar" => "\u2968", + "rx" => "\u211E", + "Sacute" => "\u015A", + "sacute" => "\u015B", + "sbquo" => "\u201A", + "scap" => "\u2AB8", + "Scaron" => "\u0160", + "scaron" => "\u0161", + "Sc" => "\u2ABC", + "sc" => "\u227B", + "sccue" => "\u227D", + "sce" => "\u2AB0", + "scE" => "\u2AB4", + "Scedil" => "\u015E", + "scedil" => "\u015F", + "Scirc" => "\u015C", + "scirc" => "\u015D", + "scnap" => "\u2ABA", + "scnE" => "\u2AB6", + "scnsim" => "\u22E9", + "scpolint" => "\u2A13", + "scsim" => "\u227F", + "Scy" => "\u0421", + "scy" => "\u0441", + "sdotb" => "\u22A1", + "sdot" => "\u22C5", + "sdote" => "\u2A66", + "searhk" => "\u2925", + "searr" => "\u2198", + "seArr" => "\u21D8", + "searrow" => "\u2198", + "sect" => "\u00A7", + "semi" => ";", + "seswar" => "\u2929", + "setminus" => "\u2216", + "setmn" => "\u2216", + "sext" => "\u2736", + "Sfr" => "\xED\xA0\xB5\xED\xB4\x96", + "sfr" => "\xED\xA0\xB5\xED\xB4\xB0", + "sfrown" => "\u2322", + "sharp" => "\u266F", + "SHCHcy" => "\u0429", + "shchcy" => "\u0449", + "SHcy" => "\u0428", + "shcy" => "\u0448", + "ShortDownArrow" => "\u2193", + "ShortLeftArrow" => "\u2190", + "shortmid" => "\u2223", + "shortparallel" => "\u2225", + "ShortRightArrow" => "\u2192", + "ShortUpArrow" => "\u2191", + "shy" => "\u00AD", + "Sigma" => "\u03A3", + "sigma" => "\u03C3", + "sigmaf" => "\u03C2", + "sigmav" => "\u03C2", + "sim" => "\u223C", + "simdot" => "\u2A6A", + "sime" => "\u2243", + "simeq" => "\u2243", + "simg" => "\u2A9E", + "simgE" => "\u2AA0", + "siml" => "\u2A9D", + "simlE" => "\u2A9F", + "simne" => "\u2246", + "simplus" => "\u2A24", + "simrarr" => "\u2972", + "slarr" => "\u2190", + "SmallCircle" => "\u2218", + "smallsetminus" => "\u2216", + "smashp" => "\u2A33", + "smeparsl" => "\u29E4", + "smid" => "\u2223", + "smile" => "\u2323", + "smt" => "\u2AAA", + "smte" => "\u2AAC", + "smtes" => "\u2AAC\uFE00", + "SOFTcy" => "\u042C", + "softcy" => "\u044C", + "solbar" => "\u233F", + "solb" => "\u29C4", + "sol" => "/", + "Sopf" => "\xED\xA0\xB5\xED\xB5\x8A", + "sopf" => "\xED\xA0\xB5\xED\xB5\xA4", + "spades" => "\u2660", + "spadesuit" => "\u2660", + "spar" => "\u2225", + "sqcap" => "\u2293", + "sqcaps" => "\u2293\uFE00", + "sqcup" => "\u2294", + "sqcups" => "\u2294\uFE00", + "Sqrt" => "\u221A", + "sqsub" => "\u228F", + "sqsube" => "\u2291", + "sqsubset" => "\u228F", + "sqsubseteq" => "\u2291", + "sqsup" => "\u2290", + "sqsupe" => "\u2292", + "sqsupset" => "\u2290", + "sqsupseteq" => "\u2292", + "square" => "\u25A1", + "Square" => "\u25A1", + "SquareIntersection" => "\u2293", + "SquareSubset" => "\u228F", + "SquareSubsetEqual" => "\u2291", + "SquareSuperset" => "\u2290", + "SquareSupersetEqual" => "\u2292", + "SquareUnion" => "\u2294", + "squarf" => "\u25AA", + "squ" => "\u25A1", + "squf" => "\u25AA", + "srarr" => "\u2192", + "Sscr" => "\xED\xA0\xB5\xED\xB2\xAE", + "sscr" => "\xED\xA0\xB5\xED\xB3\x88", + "ssetmn" => "\u2216", + "ssmile" => "\u2323", + "sstarf" => "\u22C6", + "Star" => "\u22C6", + "star" => "\u2606", + "starf" => "\u2605", + "straightepsilon" => "\u03F5", + "straightphi" => "\u03D5", + "strns" => "\u00AF", + "sub" => "\u2282", + "Sub" => "\u22D0", + "subdot" => "\u2ABD", + "subE" => "\u2AC5", + "sube" => "\u2286", + "subedot" => "\u2AC3", + "submult" => "\u2AC1", + "subnE" => "\u2ACB", + "subne" => "\u228A", + "subplus" => "\u2ABF", + "subrarr" => "\u2979", + "subset" => "\u2282", + "Subset" => "\u22D0", + "subseteq" => "\u2286", + "subseteqq" => "\u2AC5", + "SubsetEqual" => "\u2286", + "subsetneq" => "\u228A", + "subsetneqq" => "\u2ACB", + "subsim" => "\u2AC7", + "subsub" => "\u2AD5", + "subsup" => "\u2AD3", + "succapprox" => "\u2AB8", + "succ" => "\u227B", + "succcurlyeq" => "\u227D", + "Succeeds" => "\u227B", + "SucceedsEqual" => "\u2AB0", + "SucceedsSlantEqual" => "\u227D", + "SucceedsTilde" => "\u227F", + "succeq" => "\u2AB0", + "succnapprox" => "\u2ABA", + "succneqq" => "\u2AB6", + "succnsim" => "\u22E9", + "succsim" => "\u227F", + "SuchThat" => "\u220B", + "sum" => "\u2211", + "Sum" => "\u2211", + "sung" => "\u266A", + "sup1" => "\u00B9", + "sup2" => "\u00B2", + "sup3" => "\u00B3", + "sup" => "\u2283", + "Sup" => "\u22D1", + "supdot" => "\u2ABE", + "supdsub" => "\u2AD8", + "supE" => "\u2AC6", + "supe" => "\u2287", + "supedot" => "\u2AC4", + "Superset" => "\u2283", + "SupersetEqual" => "\u2287", + "suphsol" => "\u27C9", + "suphsub" => "\u2AD7", + "suplarr" => "\u297B", + "supmult" => "\u2AC2", + "supnE" => "\u2ACC", + "supne" => "\u228B", + "supplus" => "\u2AC0", + "supset" => "\u2283", + "Supset" => "\u22D1", + "supseteq" => "\u2287", + "supseteqq" => "\u2AC6", + "supsetneq" => "\u228B", + "supsetneqq" => "\u2ACC", + "supsim" => "\u2AC8", + "supsub" => "\u2AD4", + "supsup" => "\u2AD6", + "swarhk" => "\u2926", + "swarr" => "\u2199", + "swArr" => "\u21D9", + "swarrow" => "\u2199", + "swnwar" => "\u292A", + "szlig" => "\u00DF", + "Tab" => "\t", + "target" => "\u2316", + "Tau" => "\u03A4", + "tau" => "\u03C4", + "tbrk" => "\u23B4", + "Tcaron" => "\u0164", + "tcaron" => "\u0165", + "Tcedil" => "\u0162", + "tcedil" => "\u0163", + "Tcy" => "\u0422", + "tcy" => "\u0442", + "tdot" => "\u20DB", + "telrec" => "\u2315", + "Tfr" => "\xED\xA0\xB5\xED\xB4\x97", + "tfr" => "\xED\xA0\xB5\xED\xB4\xB1", + "there4" => "\u2234", + "therefore" => "\u2234", + "Therefore" => "\u2234", + "Theta" => "\u0398", + "theta" => "\u03B8", + "thetasym" => "\u03D1", + "thetav" => "\u03D1", + "thickapprox" => "\u2248", + "thicksim" => "\u223C", + "ThickSpace" => "\u205F\u200A", + "ThinSpace" => "\u2009", + "thinsp" => "\u2009", + "thkap" => "\u2248", + "thksim" => "\u223C", + "THORN" => "\u00DE", + "thorn" => "\u00FE", + "tilde" => "\u02DC", + "Tilde" => "\u223C", + "TildeEqual" => "\u2243", + "TildeFullEqual" => "\u2245", + "TildeTilde" => "\u2248", + "timesbar" => "\u2A31", + "timesb" => "\u22A0", + "times" => "\u00D7", + "timesd" => "\u2A30", + "tint" => "\u222D", + "toea" => "\u2928", + "topbot" => "\u2336", + "topcir" => "\u2AF1", + "top" => "\u22A4", + "Topf" => "\xED\xA0\xB5\xED\xB5\x8B", + "topf" => "\xED\xA0\xB5\xED\xB5\xA5", + "topfork" => "\u2ADA", + "tosa" => "\u2929", + "tprime" => "\u2034", + "trade" => "\u2122", + "TRADE" => "\u2122", + "triangle" => "\u25B5", + "triangledown" => "\u25BF", + "triangleleft" => "\u25C3", + "trianglelefteq" => "\u22B4", + "triangleq" => "\u225C", + "triangleright" => "\u25B9", + "trianglerighteq" => "\u22B5", + "tridot" => "\u25EC", + "trie" => "\u225C", + "triminus" => "\u2A3A", + "TripleDot" => "\u20DB", + "triplus" => "\u2A39", + "trisb" => "\u29CD", + "tritime" => "\u2A3B", + "trpezium" => "\u23E2", + "Tscr" => "\xED\xA0\xB5\xED\xB2\xAF", + "tscr" => "\xED\xA0\xB5\xED\xB3\x89", + "TScy" => "\u0426", + "tscy" => "\u0446", + "TSHcy" => "\u040B", + "tshcy" => "\u045B", + "Tstrok" => "\u0166", + "tstrok" => "\u0167", + "twixt" => "\u226C", + "twoheadleftarrow" => "\u219E", + "twoheadrightarrow" => "\u21A0", + "Uacute" => "\u00DA", + "uacute" => "\u00FA", + "uarr" => "\u2191", + "Uarr" => "\u219F", + "uArr" => "\u21D1", + "Uarrocir" => "\u2949", + "Ubrcy" => "\u040E", + "ubrcy" => "\u045E", + "Ubreve" => "\u016C", + "ubreve" => "\u016D", + "Ucirc" => "\u00DB", + "ucirc" => "\u00FB", + "Ucy" => "\u0423", + "ucy" => "\u0443", + "udarr" => "\u21C5", + "Udblac" => "\u0170", + "udblac" => "\u0171", + "udhar" => "\u296E", + "ufisht" => "\u297E", + "Ufr" => "\xED\xA0\xB5\xED\xB4\x98", + "ufr" => "\xED\xA0\xB5\xED\xB4\xB2", + "Ugrave" => "\u00D9", + "ugrave" => "\u00F9", + "uHar" => "\u2963", + "uharl" => "\u21BF", + "uharr" => "\u21BE", + "uhblk" => "\u2580", + "ulcorn" => "\u231C", + "ulcorner" => "\u231C", + "ulcrop" => "\u230F", + "ultri" => "\u25F8", + "Umacr" => "\u016A", + "umacr" => "\u016B", + "uml" => "\u00A8", + "UnderBar" => "_", + "UnderBrace" => "\u23DF", + "UnderBracket" => "\u23B5", + "UnderParenthesis" => "\u23DD", + "Union" => "\u22C3", + "UnionPlus" => "\u228E", + "Uogon" => "\u0172", + "uogon" => "\u0173", + "Uopf" => "\xED\xA0\xB5\xED\xB5\x8C", + "uopf" => "\xED\xA0\xB5\xED\xB5\xA6", + "UpArrowBar" => "\u2912", + "uparrow" => "\u2191", + "UpArrow" => "\u2191", + "Uparrow" => "\u21D1", + "UpArrowDownArrow" => "\u21C5", + "updownarrow" => "\u2195", + "UpDownArrow" => "\u2195", + "Updownarrow" => "\u21D5", + "UpEquilibrium" => "\u296E", + "upharpoonleft" => "\u21BF", + "upharpoonright" => "\u21BE", + "uplus" => "\u228E", + "UpperLeftArrow" => "\u2196", + "UpperRightArrow" => "\u2197", + "upsi" => "\u03C5", + "Upsi" => "\u03D2", + "upsih" => "\u03D2", + "Upsilon" => "\u03A5", + "upsilon" => "\u03C5", + "UpTeeArrow" => "\u21A5", + "UpTee" => "\u22A5", + "upuparrows" => "\u21C8", + "urcorn" => "\u231D", + "urcorner" => "\u231D", + "urcrop" => "\u230E", + "Uring" => "\u016E", + "uring" => "\u016F", + "urtri" => "\u25F9", + "Uscr" => "\xED\xA0\xB5\xED\xB2\xB0", + "uscr" => "\xED\xA0\xB5\xED\xB3\x8A", + "utdot" => "\u22F0", + "Utilde" => "\u0168", + "utilde" => "\u0169", + "utri" => "\u25B5", + "utrif" => "\u25B4", + "uuarr" => "\u21C8", + "Uuml" => "\u00DC", + "uuml" => "\u00FC", + "uwangle" => "\u29A7", + "vangrt" => "\u299C", + "varepsilon" => "\u03F5", + "varkappa" => "\u03F0", + "varnothing" => "\u2205", + "varphi" => "\u03D5", + "varpi" => "\u03D6", + "varpropto" => "\u221D", + "varr" => "\u2195", + "vArr" => "\u21D5", + "varrho" => "\u03F1", + "varsigma" => "\u03C2", + "varsubsetneq" => "\u228A\uFE00", + "varsubsetneqq" => "\u2ACB\uFE00", + "varsupsetneq" => "\u228B\uFE00", + "varsupsetneqq" => "\u2ACC\uFE00", + "vartheta" => "\u03D1", + "vartriangleleft" => "\u22B2", + "vartriangleright" => "\u22B3", + "vBar" => "\u2AE8", + "Vbar" => "\u2AEB", + "vBarv" => "\u2AE9", + "Vcy" => "\u0412", + "vcy" => "\u0432", + "vdash" => "\u22A2", + "vDash" => "\u22A8", + "Vdash" => "\u22A9", + "VDash" => "\u22AB", + "Vdashl" => "\u2AE6", + "veebar" => "\u22BB", + "vee" => "\u2228", + "Vee" => "\u22C1", + "veeeq" => "\u225A", + "vellip" => "\u22EE", + "verbar" => "|", + "Verbar" => "\u2016", + "vert" => "|", + "Vert" => "\u2016", + "VerticalBar" => "\u2223", + "VerticalLine" => "|", + "VerticalSeparator" => "\u2758", + "VerticalTilde" => "\u2240", + "VeryThinSpace" => "\u200A", + "Vfr" => "\xED\xA0\xB5\xED\xB4\x99", + "vfr" => "\xED\xA0\xB5\xED\xB4\xB3", + "vltri" => "\u22B2", + "vnsub" => "\u2282\u20D2", + "vnsup" => "\u2283\u20D2", + "Vopf" => "\xED\xA0\xB5\xED\xB5\x8D", + "vopf" => "\xED\xA0\xB5\xED\xB5\xA7", + "vprop" => "\u221D", + "vrtri" => "\u22B3", + "Vscr" => "\xED\xA0\xB5\xED\xB2\xB1", + "vscr" => "\xED\xA0\xB5\xED\xB3\x8B", + "vsubnE" => "\u2ACB\uFE00", + "vsubne" => "\u228A\uFE00", + "vsupnE" => "\u2ACC\uFE00", + "vsupne" => "\u228B\uFE00", + "Vvdash" => "\u22AA", + "vzigzag" => "\u299A", + "Wcirc" => "\u0174", + "wcirc" => "\u0175", + "wedbar" => "\u2A5F", + "wedge" => "\u2227", + "Wedge" => "\u22C0", + "wedgeq" => "\u2259", + "weierp" => "\u2118", + "Wfr" => "\xED\xA0\xB5\xED\xB4\x9A", + "wfr" => "\xED\xA0\xB5\xED\xB4\xB4", + "Wopf" => "\xED\xA0\xB5\xED\xB5\x8E", + "wopf" => "\xED\xA0\xB5\xED\xB5\xA8", + "wp" => "\u2118", + "wr" => "\u2240", + "wreath" => "\u2240", + "Wscr" => "\xED\xA0\xB5\xED\xB2\xB2", + "wscr" => "\xED\xA0\xB5\xED\xB3\x8C", + "xcap" => "\u22C2", + "xcirc" => "\u25EF", + "xcup" => "\u22C3", + "xdtri" => "\u25BD", + "Xfr" => "\xED\xA0\xB5\xED\xB4\x9B", + "xfr" => "\xED\xA0\xB5\xED\xB4\xB5", + "xharr" => "\u27F7", + "xhArr" => "\u27FA", + "Xi" => "\u039E", + "xi" => "\u03BE", + "xlarr" => "\u27F5", + "xlArr" => "\u27F8", + "xmap" => "\u27FC", + "xnis" => "\u22FB", + "xodot" => "\u2A00", + "Xopf" => "\xED\xA0\xB5\xED\xB5\x8F", + "xopf" => "\xED\xA0\xB5\xED\xB5\xA9", + "xoplus" => "\u2A01", + "xotime" => "\u2A02", + "xrarr" => "\u27F6", + "xrArr" => "\u27F9", + "Xscr" => "\xED\xA0\xB5\xED\xB2\xB3", + "xscr" => "\xED\xA0\xB5\xED\xB3\x8D", + "xsqcup" => "\u2A06", + "xuplus" => "\u2A04", + "xutri" => "\u25B3", + "xvee" => "\u22C1", + "xwedge" => "\u22C0", + "Yacute" => "\u00DD", + "yacute" => "\u00FD", + "YAcy" => "\u042F", + "yacy" => "\u044F", + "Ycirc" => "\u0176", + "ycirc" => "\u0177", + "Ycy" => "\u042B", + "ycy" => "\u044B", + "yen" => "\u00A5", + "Yfr" => "\xED\xA0\xB5\xED\xB4\x9C", + "yfr" => "\xED\xA0\xB5\xED\xB4\xB6", + "YIcy" => "\u0407", + "yicy" => "\u0457", + "Yopf" => "\xED\xA0\xB5\xED\xB5\x90", + "yopf" => "\xED\xA0\xB5\xED\xB5\xAA", + "Yscr" => "\xED\xA0\xB5\xED\xB2\xB4", + "yscr" => "\xED\xA0\xB5\xED\xB3\x8E", + "YUcy" => "\u042E", + "yucy" => "\u044E", + "yuml" => "\u00FF", + "Yuml" => "\u0178", + "Zacute" => "\u0179", + "zacute" => "\u017A", + "Zcaron" => "\u017D", + "zcaron" => "\u017E", + "Zcy" => "\u0417", + "zcy" => "\u0437", + "Zdot" => "\u017B", + "zdot" => "\u017C", + "zeetrf" => "\u2128", + "ZeroWidthSpace" => "\u200B", + "Zeta" => "\u0396", + "zeta" => "\u03B6", + "zfr" => "\xED\xA0\xB5\xED\xB4\xB7", + "Zfr" => "\u2128", + "ZHcy" => "\u0416", + "zhcy" => "\u0436", + "zigrarr" => "\u21DD", + "zopf" => "\xED\xA0\xB5\xED\xB5\xAB", + "Zopf" => "\u2124", + "Zscr" => "\xED\xA0\xB5\xED\xB2\xB5", + "zscr" => "\xED\xA0\xB5\xED\xB3\x8F", + "zwj" => "\u200D", + "zwnj" => "\u200C", + } +end diff --git a/lib/markd/src/markd/mappings/legacy.cr b/lib/markd/src/markd/mappings/legacy.cr new file mode 100644 index 000000000000..2aaa50ca6eec --- /dev/null +++ b/lib/markd/src/markd/mappings/legacy.cr @@ -0,0 +1,110 @@ +module Markd::HTMLEntities + LEGACY_MAPPINGS = { + "Aacute" => '\u00C1', + "aacute" => '\u00E1', + "Acirc" => '\u00C2', + "acirc" => '\u00E2', + "acute" => '\u00B4', + "AElig" => '\u00C6', + "aelig" => '\u00E6', + "Agrave" => '\u00C0', + "agrave" => '\u00E0', + "amp" => '&', + "AMP" => '&', + "Aring" => '\u00C5', + "aring" => '\u00E5', + "Atilde" => '\u00C3', + "atilde" => '\u00E3', + "Auml" => '\u00C4', + "auml" => '\u00E4', + "brvbar" => '\u00A6', + "Ccedil" => '\u00C7', + "ccedil" => '\u00E7', + "cedil" => '\u00B8', + "cent" => '\u00A2', + "copy" => '\u00A9', + "COPY" => '\u00A9', + "curren" => '\u00A4', + "deg" => '\u00B0', + "divide" => '\u00F7', + "Eacute" => '\u00C9', + "eacute" => '\u00E9', + "Ecirc" => '\u00CA', + "ecirc" => '\u00EA', + "Egrave" => '\u00C8', + "egrave" => '\u00E8', + "ETH" => '\u00D0', + "eth" => '\u00F0', + "Euml" => '\u00CB', + "euml" => '\u00EB', + "frac12" => '\u00BD', + "frac14" => '\u00BC', + "frac34" => '\u00BE', + "gt" => '>', + "GT" => '>', + "Iacute" => '\u00CD', + "iacute" => '\u00ED', + "Icirc" => '\u00CE', + "icirc" => '\u00EE', + "iexcl" => '\u00A1', + "Igrave" => '\u00CC', + "igrave" => '\u00EC', + "iquest" => '\u00BF', + "Iuml" => '\u00CF', + "iuml" => '\u00EF', + "laquo" => '\u00AB', + "lt" => '<', + "LT" => '<', + "macr" => '\u00AF', + "micro" => '\u00B5', + "middot" => '\u00B7', + "nbsp" => '\u00A0', + "not" => '\u00AC', + "Ntilde" => '\u00D1', + "ntilde" => '\u00F1', + "Oacute" => '\u00D3', + "oacute" => '\u00F3', + "Ocirc" => '\u00D4', + "ocirc" => '\u00F4', + "Ograve" => '\u00D2', + "ograve" => '\u00F2', + "ordf" => '\u00AA', + "ordm" => '\u00BA', + "Oslash" => '\u00D8', + "oslash" => '\u00F8', + "Otilde" => '\u00D5', + "otilde" => '\u00F5', + "Ouml" => '\u00D6', + "ouml" => '\u00F6', + "para" => '\u00B6', + "plusmn" => '\u00B1', + "pound" => '\u00A3', + "quot" => "\"", + "QUOT" => "\"", + "raquo" => '\u00BB', + "reg" => '\u00AE', + "REG" => '\u00AE', + "sect" => '\u00A7', + "shy" => '\u00AD', + "sup1" => '\u00B9', + "sup2" => '\u00B2', + "sup3" => '\u00B3', + "szlig" => '\u00DF', + "THORN" => '\u00DE', + "thorn" => '\u00FE', + "times" => '\u00D7', + "Uacute" => '\u00DA', + "uacute" => '\u00FA', + "Ucirc" => '\u00DB', + "ucirc" => '\u00FB', + "Ugrave" => '\u00D9', + "ugrave" => '\u00F9', + "uml" => '\u00A8', + "Uuml" => '\u00DC', + "uuml" => '\u00FC', + "Yacute" => '\u00DD', + "yacute" => '\u00FD', + "yen" => '\u00A5', + "yuml" => '\u00FF', + } +end diff --git a/lib/markd/src/markd/node.cr b/lib/markd/src/markd/node.cr new file mode 100644 index 000000000000..06a68badf33a --- /dev/null +++ b/lib/markd/src/markd/node.cr @@ -0,0 +1,175 @@ +module Markd + class Node + # Node Type + enum Type + Document + Paragraph + Text + Strong + Emphasis + Link + Image + Heading + List + Item + BlockQuote + ThematicBreak + Code + CodeBlock + HTMLBlock + HTMLInline + LineBreak + SoftBreak + + CustomInLine + CustomBlock + + def container? + CONTAINER_TYPES.includes?(self) + end + end + + CONTAINER_TYPES = { + Type::Document, + Type::Paragraph, + Type::Strong, + Type::Emphasis, + Type::Link, + Type::Image, + Type::Heading, + Type::List, + Type::Item, + Type::BlockQuote, + Type::CustomInLine, + Type::CustomBlock, + } + + alias DataValue = String | Int32 | Bool + alias DataType = Hash(String, DataValue) + + property type : Type + + property(data) { {} of String => DataValue } + property source_pos = { {1, 1}, {0, 0} } + property text = "" + property? open = true + property? fenced = false + property fence_language = "" + property fence_char = "" + property fence_length = 0 + property fence_offset = 0 + property? last_line_blank = false + + property! parent : Node? + property! first_child : Node? + property! last_child : Node? + property! prev : Node? + property! next : Node? + + def initialize(@type) + end + + def append_child(child : Node) + child.unlink + child.parent = self + + if last = last_child? + last.next = child + child.prev = last + @last_child = child + else + @first_child = child + @last_child = child + end + end + + def insert_after(sibling : Node) + sibling.unlink + + if nxt = next? + nxt.prev = sibling + elsif parent = parent? + parent.last_child = sibling + end + sibling.next = nxt + + sibling.prev = self + @next = sibling + sibling.parent = parent? + end + + def unlink + if prev = prev? + prev.next = next? + elsif parent = parent? + parent.first_child = next? + end + + if nxt = next? + nxt.prev = prev? + elsif parent = parent? + parent.last_child = prev? + end + + @parent = nil + @next = nil + @prev = nil + end + + def walker + Walker.new(self) + end + + def to_s(io : IO) + io << "#<" << {{@type.name.id.stringify}} << ":0x" + object_id.to_s(16, io) + io << " @type=" << @type + io << " @parent=" << @parent if @parent + io << " @next=" << @next if @next + + data = @data + io << " @data=" << data if data && !data.empty? + + io << ">" + nil + end + + private class Walker + def initialize(@root : Node) + @current = @root + @entering = true + end + + def next + current = @current + return unless current + + entering = @entering + + if entering && current.type.container? + if first_child = current.first_child? + @current = first_child + @entering = true + else + @entering = false + end + elsif current == @root + @current = nil + elsif nxt = current.next? + @current = current.next? + @entering = true + else + @current = current.parent? + @entering = false + end + + return current, entering + end + + def resume_at(node : Node, entering : Bool) + @current = node + @entering = entering + end + end + end +end diff --git a/lib/markd/src/markd/options.cr b/lib/markd/src/markd/options.cr new file mode 100644 index 000000000000..8f57b910f074 --- /dev/null +++ b/lib/markd/src/markd/options.cr @@ -0,0 +1,24 @@ +require "uri" + +module Markd + struct Options + property time, gfm, toc, smart, source_pos, safe, prettyprint + + # If `base_url` is not `nil`, it is used to resolve URLs of relative + # links. It act's like HTML's `` in the context + # of a Markdown document. + property base_url : URI? + + def initialize( + @time = false, + @gfm = false, + @toc = false, + @smart = false, + @source_pos = false, + @safe = false, + @prettyprint = false, + @base_url = nil + ) + end + end +end diff --git a/lib/markd/src/markd/parser.cr b/lib/markd/src/markd/parser.cr new file mode 100644 index 000000000000..52da8e7564bb --- /dev/null +++ b/lib/markd/src/markd/parser.cr @@ -0,0 +1,9 @@ +module Markd + module Parser + def self.parse(source : String, options = Options.new) + Block.parse(source, options) + end + end +end + +require "./parsers/*" diff --git a/lib/markd/src/markd/parsers/block.cr b/lib/markd/src/markd/parsers/block.cr new file mode 100644 index 000000000000..94b53f0c577d --- /dev/null +++ b/lib/markd/src/markd/parsers/block.cr @@ -0,0 +1,337 @@ +module Markd::Parser + class Block + include Parser + + def self.parse(source : String, options = Options.new) + new(options).parse(source) + end + + RULES = { + Node::Type::Document => Rule::Document.new, + Node::Type::BlockQuote => Rule::BlockQuote.new, + Node::Type::Heading => Rule::Heading.new, + Node::Type::CodeBlock => Rule::CodeBlock.new, + Node::Type::HTMLBlock => Rule::HTMLBlock.new, + Node::Type::ThematicBreak => Rule::ThematicBreak.new, + Node::Type::List => Rule::List.new, + Node::Type::Item => Rule::Item.new, + Node::Type::Paragraph => Rule::Paragraph.new, + } + + property! tip : Node? + property offset, column + + getter line, current_line, blank, inline_lexer, + indent, indented, next_nonspace, refmap + + def initialize(@options : Options) + @inline_lexer = Inline.new(@options) + + @document = Node.new(Node::Type::Document) + @tip = @document + @oldtip = @tip + @last_matched_container = @tip + + @line = "" + + @current_line = 0 + @offset = 0 + @column = 0 + @last_line_length = 0 + + @next_nonspace = 0 + @next_nonspace_column = 0 + + @indent = 0 + @indented = false + @blank = false + @partially_consumed_tab = false + @all_closed = true + @refmap = {} of String => Hash(String, String) | String + end + + def parse(source : String) + Utils.timer("block parsing", @options.time) do + parse_blocks(source) + end + + Utils.timer("inline parsing", @options.time) do + process_inlines + end + + @document + end + + private def parse_blocks(source) + lines_size = 0 + source.each_line do |line| + process_line(line) + lines_size += 1 + end + + # ignore last blank line created by final newline + lines_size -= 1 if source.ends_with?('\n') + + while tip = tip? + token(tip, lines_size) + end + end + + private def process_line(line : String) + container = @document + @oldtip = tip + @offset = 0 + @column = 0 + @blank = false + @partially_consumed_tab = false + @current_line += 1 + + line = line.gsub(Char::ZERO, '\u{FFFD}') + @line = line + + while (last_child = container.last_child?) && last_child.open? + container = last_child + + find_next_nonspace + + case RULES[container.type].continue(self, container) + when Rule::ContinueStatus::Continue + # we've matched, keep going + when Rule::ContinueStatus::Stop + # we've failed to match a block + # back up to last matching block + container = container.parent + break + when Rule::ContinueStatus::Return + # we've hit end of line for fenced code close and can return + @last_line_length = line.size + return + end + end + + @all_closed = (container == @oldtip) + @last_matched_container = container + + matched_leaf = !container.type.paragraph? && RULES[container.type].accepts_lines? + + while !matched_leaf + find_next_nonspace + + # this is a little performance optimization + unless @indented + first_char = @line[@next_nonspace]? + unless first_char && (Rule::MAYBE_SPECIAL.includes?(first_char) || first_char.ascii_number?) + advance_next_nonspace + break + end + end + + matched = RULES.each_value do |rule| + case rule.match(self, container) + when Rule::MatchValue::Container + container = tip + break true + when Rule::MatchValue::Leaf + container = tip + matched_leaf = true + break true + else + false + end + end + + # nothing matched + unless matched + advance_next_nonspace + break + end + end + + if !@all_closed && !@blank && tip.type.paragraph? + # lazy paragraph continuation + add_line + else + # not a lazy continuation + close_unmatched_blocks + if @blank && (last_child = container.last_child?) + last_child.last_line_blank = true + end + + container_type = container.type + last_line_blank = @blank && + !(container_type.block_quote? || + (container_type.code_block? && container.fenced?) || + (container_type.item? && !container.first_child? && container.source_pos[0][0] == @current_line)) + + cont = container + while cont + cont.last_line_blank = last_line_blank + cont = cont.parent? + end + + if RULES[container_type].accepts_lines? + add_line + + # if HtmlBlock, check for end condition + if (container_type.html_block? && match_html_block?(container)) + token(container, @current_line) + end + elsif @offset < line.size && !@blank + # create paragraph container for line + add_child(Node::Type::Paragraph, @offset) + advance_next_nonspace + add_line + end + + @last_line_length = line.size + end + + nil + end + + private def process_inlines + walker = @document.walker + @inline_lexer.refmap = @refmap + while (event = walker.next) + node, entering = event + if !entering && (node.type.paragraph? || node.type.heading?) + @inline_lexer.parse(node) + end + end + + nil + end + + def token(container : Node, line_number : Int32) + container_parent = container.parent? + + container.open = false + container.source_pos = { + container.source_pos[0], + {line_number, @last_line_length}, + } + RULES[container.type].token(self, container) + + @tip = container_parent + + nil + end + + private def add_line + if @partially_consumed_tab + @offset += 1 # skip over tab + # add space characters + chars_to_tab = Rule::CODE_INDENT - (@column % 4) + tip.text += " " * chars_to_tab + end + + tip.text += @line[@offset..-1] + "\n" + + nil + end + + def add_child(type : Node::Type, offset : Int32) : Node + while !RULES[tip.type].can_contain?(type) + token(tip, @current_line - 1) + end + + column_number = offset + 1 # offset 0 = column 1 + + node = Node.new(type) + node.source_pos = { {@current_line, column_number}, {0, 0} } + node.text = "" + tip.append_child(node) + @tip = node + + node + end + + def close_unmatched_blocks + unless @all_closed + while (oldtip = @oldtip) && oldtip != @last_matched_container + parent = oldtip.parent? + token(oldtip, @current_line - 1) + @oldtip = parent + end + @all_closed = true + end + nil + end + + private def find_next_nonspace + offset = @offset + column = @column + + if @line.empty? + @blank = true + else + while char = @line[offset]? + case char + when ' ' + offset += 1 + column += 1 + when '\t' + offset += 1 + column += (4 - (column % 4)) + else + break + end + end + + @blank = {nil, '\n', '\r'}.includes?(char) + end + + @next_nonspace = offset + @next_nonspace_column = column + @indent = @next_nonspace_column - @column + @indented = @indent >= Rule::CODE_INDENT + + nil + end + + def advance_offset(count, columns = false) + line = @line + while count > 0 && (char = line[@offset]?) + if char == '\t' + chars_to_tab = Rule::CODE_INDENT - (@column % 4) + if columns + @partially_consumed_tab = chars_to_tab > count + chars_to_advance = chars_to_tab > count ? count : chars_to_tab + @column += chars_to_advance + @offset += @partially_consumed_tab ? 0 : 1 + count -= chars_to_advance + else + @partially_consumed_tab = false + @column += chars_to_tab + @offset += 1 + count -= 1 + end + else + @partially_consumed_tab = false + @column += 1 # assume ascii; block starts are ascii + @offset += 1 + count -= 1 + end + end + + nil + end + + def advance_next_nonspace + @offset = @next_nonspace + @column - @next_nonspace_column + @partially_consumed_tab = false + + nil + end + + private def match_html_block?(container : Node) + if block_type = container.data["html_block_type"] + block_type = block_type.as(Int32) + block_type >= 0 && block_type <= 4 && Rule::HTML_BLOCK_CLOSE[block_type].match(@line[@offset..-1]) + else + false + end + end + end +end diff --git a/lib/markd/src/markd/parsers/inline.cr b/lib/markd/src/markd/parsers/inline.cr new file mode 100644 index 000000000000..92592b53dbd6 --- /dev/null +++ b/lib/markd/src/markd/parsers/inline.cr @@ -0,0 +1,847 @@ +require "html" + +module Markd::Parser + class Inline + include Parser + + property refmap + private getter! brackets + + @delimiters : Delimiter? + + def initialize(@options : Options) + @text = "" + @pos = 0 + @refmap = {} of String => Hash(String, String) | String + end + + def parse(node : Node) + @pos = 0 + @delimiters = nil + @text = node.text.strip + + loop do + break unless process_line(node) + end + + node.text = "" + process_emphasis(nil) + end + + private def process_line(node : Node) + char = char_at?(@pos) + + return false unless char && char != Char::ZERO + + res = case char + when '\n' + newline(node) + when '\\' + backslash(node) + when '`' + backtick(node) + when '*', '_' + handle_delim(char, node) + when '\'', '"' + @options.smart && handle_delim(char, node) + when '[' + open_bracket(node) + when '!' + bang(node) + when ']' + close_bracket(node) + when '<' + auto_link(node) || html_tag(node) + when '&' + entity(node) + else + string(node) + end + + unless res + @pos += 1 + node.append_child(text(char)) + end + + true + end + + private def newline(node : Node) + @pos += 1 # assume we're at a \n + last_child = node.last_child? + # check previous node for trailing spaces + if last_child && last_child.type.text? && + last_child.text.ends_with?(' ') + hard_break = if last_child.text.size == 1 + false # Must be space + else + last_child.text[-2]? == ' ' + end + last_child.text = last_child.text.rstrip ' ' + node.append_child(Node.new(hard_break ? Node::Type::LineBreak : Node::Type::SoftBreak)) + else + node.append_child(Node.new(Node::Type::SoftBreak)) + end + + # gobble leading spaces in next line + while char_at?(@pos) == ' ' + @pos += 1 + end + + true + end + + private def backslash(node : Node) + @pos += 1 + + char = @pos < @text.bytesize ? char_at(@pos).to_s : nil + child = if char_at?(@pos) == '\n' + @pos += 1 + Node.new(Node::Type::LineBreak) + elsif char && char.match(Rule::ESCAPABLE) + c = text(char) + @pos += 1 + c + else + text("\\") + end + + node.append_child(child) + + true + end + + private def backtick(node : Node) + start_pos = @pos + while char_at?(@pos) == '`' + @pos += 1 + end + return false if start_pos == @pos + + num_ticks = @pos - start_pos + after_open_ticks = @pos + while text = match(Rule::TICKS) + if text.bytesize == num_ticks + child = Node.new(Node::Type::Code) + child.text = @text.byte_slice(after_open_ticks, (@pos - num_ticks) - after_open_ticks).strip.gsub(Rule::WHITESPACE, " ") + node.append_child(child) + + return true + end + end + + @pos = after_open_ticks + node.append_child(text("`" * num_ticks)) + + true + end + + private def bang(node : Node) + start_pos = @pos + @pos += 1 + if char_at?(@pos) == '[' + @pos += 1 + child = text("![") + node.append_child(child) + + add_bracket(child, start_pos + 1, true) + else + node.append_child(text("!")) + end + + true + end + + private def add_bracket(node : Node, index : Int32, image = false) + brackets.bracket_after = true if brackets? + @brackets = Bracket.new(node, @brackets, @delimiters, index, image, true) + end + + private def remove_bracket + @brackets = brackets.previous? + end + + private def open_bracket(node : Node) + start_pos = @pos + @pos += 1 + + child = text("[") + node.append_child(child) + + add_bracket(child, start_pos, false) + + true + end + + private def close_bracket(node : Node) + title = "" + dest = "" + matched = false + @pos += 1 + start_pos = @pos + + # get last [ or ![ + opener = @brackets + unless opener + # no matched opener, just return a literal + node.append_child(text("]")) + return true + end + + unless opener.active + # no matched opener, just return a literal + node.append_child(text("]")) + # take opener off brackets stack + remove_bracket + return true + end + + # If we got here, open is a potential opener + is_image = opener.image + + # Check to see if we have a link/image + save_pos = @pos + + # Inline link? + if char_at?(@pos) == '(' + @pos += 1 + if spnl && (dest = link_destination) && + spnl && (char_at?(@pos - 1).try(&.whitespace?) && + (title = link_title) || true) && spnl && + char_at?(@pos) == ')' + @pos += 1 + matched = true + else + @pos = save_pos + end + end + + ref_label = nil + unless matched + # Next, see if there's a link label + before_label = @pos + label_size = link_label + if label_size > 2 + ref_label = normalize_refernence(@text.byte_slice(before_label, label_size + 1)) + elsif !opener.bracket_after + # Empty or missing second label means to use the first label as the reference. + # The reference must not contain a bracket. If we know there's a bracket, we don't even bother checking it. + ref_label = normalize_refernence(@text.byte_slice(opener.index, start_pos - opener.index)) + end + + if label_size == 0 + # If shortcut reference link, rewind before spaces we skipped. + @pos = save_pos + end + + if ref_label && @refmap[ref_label]? + # lookup rawlabel in refmap + link = @refmap[ref_label].as(Hash) + dest = link["destination"] if link["destination"] + title = link["title"] if link["title"] + matched = true + end + end + + if matched + child = Node.new(is_image ? Node::Type::Image : Node::Type::Link) + child.data["destination"] = dest + child.data["title"] = title || "" + + tmp = opener.node.next? + while tmp + next_node = tmp.next? + tmp.unlink + child.append_child(tmp) + tmp = next_node + end + + node.append_child(child) + process_emphasis(opener.previous_delimiter) + remove_bracket + opener.node.unlink + + unless is_image + opener = @brackets + while opener + opener.active = false unless opener.image + opener = opener.previous? + end + end + else + remove_bracket + @pos = start_pos + node.append_child(text("]")) + end + + true + end + + private def process_emphasis(delimiter : Delimiter?) + # find first closer above stack_bottom: + closer = @delimiters + while closer + previous = closer.previous? + break if previous == delimiter + closer = previous + end + + if closer + openers_bottom = { + '_' => delimiter, + '*' => delimiter, + '\'' => delimiter, + '"' => delimiter, + } of Char => Delimiter? + + # move forward, looking for closers, and handling each + while closer + closer_char = closer.char + + unless closer.can_close + closer = closer.next? + next + end + + # found emphasis closer. now look back for first matching opener: + opener = closer.previous? + opener_found = false + while opener && opener != delimiter && opener != openers_bottom[closer_char] + odd_match = (closer.can_open || opener.can_close) && + (opener.orig_delims + closer.orig_delims) % 3 == 0 + if opener.char == closer.char && opener.can_open && !odd_match + opener_found = true + break + end + opener = opener.previous? + end + opener = nil unless opener_found + + old_closer = closer + + case closer_char + when '*', '_' + unless opener + closer = closer.next? + else + # calculate actual number of delimiters used from closer + use_delims = (closer.num_delims >= 2 && opener.num_delims >= 2) ? 2 : 1 + opener_inl = opener.node + closer_inl = closer.node + + # remove used delimiters from stack elts and inlines + opener.num_delims -= use_delims + closer.num_delims -= use_delims + + opener_inl.text = opener_inl.text[0..(-use_delims - 1)] + closer_inl.text = closer_inl.text[0..(-use_delims - 1)] + + # build contents for new emph element + emph = Node.new((use_delims == 1) ? Node::Type::Emphasis : Node::Type::Strong) + + tmp = opener_inl.next? + while tmp && tmp != closer_inl + next_node = tmp.next? + tmp.unlink + emph.append_child(tmp) + tmp = next_node + end + + opener_inl.insert_after(emph) + + # remove elts between opener and closer in delimiters stack + remove_delimiter_between(opener, closer) + + # if opener has 0 delims, remove it and the inline + if opener.num_delims == 0 + opener_inl.unlink + remove_delimiter(opener) + end + + if closer.num_delims == 0 + closer_inl.unlink + tmp_stack = closer.next? + remove_delimiter(closer) + closer = tmp_stack + end + end + when '\'' + closer.node.text = "\u{2019}" + if opener + opener.node.text = "\u{2018}" + end + closer = closer.next? + when '"' + closer.node.text = "\u{201D}" + if opener + opener.node.text = "\u{201C}" + end + closer = closer.next? + else + nil + end + + if !opener && !odd_match + openers_bottom[closer_char] = old_closer.previous? + remove_delimiter(old_closer) if !old_closer.can_open + end + end + end + + # remove all delimiters + while (curr_delimiter = @delimiters) && curr_delimiter != delimiter + remove_delimiter(curr_delimiter) + end + end + + private def auto_link(node : Node) + if text = match(Rule::EMAIL_AUTO_LINK) + node.append_child(link(text, true)) + return true + elsif text = match(Rule::AUTO_LINK) + node.append_child(link(text, false)) + return true + end + + false + end + + private def html_tag(node : Node) + if text = match(Rule::HTML_TAG) + child = Node.new(Node::Type::HTMLInline) + child.text = text + node.append_child(child) + true + else + false + end + end + + private def entity(node : Node) + if char_at?(@pos) == '&' + pos = @pos + 1 + loop do + char = char_at?(pos) + pos += 1 + case char + when ';' + break + when Char::ZERO, nil + return false + else + nil + end + end + text = @text.byte_slice((@pos + 1), (pos - 1) - (@pos + 1)) + decoded_text = HTML.decode_entity text + + node.append_child(text(decoded_text)) + @pos = pos + true + else + false + end + end + + private def string(node : Node) + if text = match_main + if @options.smart + text = text.gsub(Rule::ELLIPSIS, '\u{2026}') + .gsub(Rule::DASH) do |chars| + en_count = em_count = 0 + chars_length = chars.size + + if chars_length % 3 == 0 + em_count = chars_length // 3 + elsif chars_length % 2 == 0 + en_count = chars_length // 2 + elsif chars_length % 3 == 2 + en_count = 1 + em_count = (chars_length - 2) // 3 + else + en_count = 2 + em_count = (chars_length - 4) // 3 + end + + "\u{2014}" * em_count + "\u{2013}" * en_count + end + end + node.append_child(text(text)) + true + else + false + end + end + + private def link(match : String, email = false) : Node + dest = match[1..-2] + destination = email ? "mailto:#{dest}" : dest + + node = Node.new(Node::Type::Link) + node.data["title"] = "" + node.data["destination"] = normalize_uri(destination) + node.append_child(text(dest)) + node + end + + private def link_label + text = match(Rule::LINK_LABEL) + if text && text.size <= 1001 && (!text.ends_with?("\\]") || text[-3]? == '\\') + text.bytesize - 1 + else + 0 + end + end + + private def link_title + title = match(Rule::LINK_TITLE) + return unless title + + Utils.decode_entities_string(title[1..-2]) + end + + private def link_destination + dest = if text = match(Rule::LINK_DESTINATION_BRACES) + text[1..-2] + else + save_pos = @pos + open_parens = 0 + while char = char_at?(@pos) + case char + when '\\' + @pos += 1 + @pos += 1 if char_at?(@pos) + when '(' + @pos += 1 + open_parens += 1 + when ')' + break if open_parens < 1 + + @pos += 1 + open_parens -= 1 + when .ascii_whitespace? + break + else + @pos += 1 + end + end + + @text.byte_slice(save_pos, @pos - save_pos) + end + + normalize_uri(Utils.decode_entities_string(dest)) + end + + private def handle_delim(char : Char, node : Node) + res = scan_delims(char) + return false unless res + + num_delims = res[:num_delims] + start_pos = @pos + @pos += num_delims + text = case char + when '\'' + "\u{2019}" + when '"' + "\u{201C}" + else + @text.byte_slice(start_pos, @pos - start_pos) + end + + child = text(text) + node.append_child(child) + + delimiter = Delimiter.new(char, num_delims, num_delims, child, @delimiters, nil, res[:can_open], res[:can_close]) + + if prev = delimiter.previous? + prev.next = delimiter + end + + @delimiters = delimiter + + true + end + + private def remove_delimiter(delimiter : Delimiter) + if prev = delimiter.previous? + prev.next = delimiter.next? + end + + if nxt = delimiter.next? + nxt.previous = delimiter.previous? + else + # top of stack + @delimiters = delimiter.previous? + end + end + + private def remove_delimiter_between(bottom : Delimiter, top : Delimiter) + if bottom.next? != top + bottom.next = top + top.previous = bottom + end + end + + private def scan_delims(char : Char) + num_delims = 0 + start_pos = @pos + if char == '\'' || char == '"' + num_delims += 1 + @pos += 1 + else + while char_at?(@pos) == char + num_delims += 1 + @pos += 1 + end + end + + return if num_delims == 0 + + char_before = start_pos == 0 ? '\n' : previous_unicode_char_at(start_pos) + char_after = unicode_char_at?(@pos) || '\n' + + # Match ASCII code 160 => \xA0 (See http://www.adamkoch.com/2009/07/25/white-space-and-character-160/) + after_is_whitespace = char_after.ascii_whitespace? || char_after == '\u00A0' + after_is_punctuation = !!char_after.to_s.match(Rule::PUNCTUATION) + before_is_whitespace = char_before.ascii_whitespace? || char_after == '\u00A0' + before_is_punctuation = !!char_before.to_s.match(Rule::PUNCTUATION) + + left_flanking = !after_is_whitespace && + (!after_is_punctuation || before_is_whitespace || before_is_punctuation) + right_flanking = !before_is_whitespace && + (!before_is_punctuation || after_is_whitespace || after_is_punctuation) + + case char + when '_' + can_open = left_flanking && (!right_flanking || before_is_punctuation) + can_close = right_flanking && (!left_flanking || after_is_punctuation) + when '\'', '"' + can_open = left_flanking && !right_flanking + can_close = right_flanking + else + can_open = left_flanking + can_close = right_flanking + end + + @pos = start_pos + + { + num_delims: num_delims, + can_open: can_open, + can_close: can_close, + } + end + + def reference(text : String, refmap) + @text = text + @pos = 0 + + startpos = @pos + match_chars = link_label + + # label + return 0 if match_chars == 0 + raw_label = @text.byte_slice(0, match_chars + 1) + + # colon + if char_at?(@pos) == ':' + @pos += 1 + else + @pos = startpos + return 0 + end + + # link url + spnl + + dest = link_destination + + if dest.size == 0 + @pos = startpos + return 0 + end + + before_title = @pos + spnl + title = link_title + unless title + title = "" + @pos = before_title + end + + at_line_end = true + unless space_at_end_of_line? + if title.empty? + at_line_end = false + else + title = "" + @pos = before_title + at_line_end = space_at_end_of_line? + end + end + + unless at_line_end + @pos = startpos + return 0 + end + + normal_label = normalize_refernence(raw_label) + if normal_label.empty? + @pos = startpos + return 0 + end + + unless refmap[normal_label]? + refmap[normal_label] = { + "destination" => dest, + "title" => title, + } + end + + return @pos - startpos + end + + private def space_at_end_of_line? + while char_at?(@pos) == ' ' + @pos += 1 + end + + case char_at?(@pos) + when '\n' + @pos += 1 + when Char::ZERO + else + return false + end + return true + end + + # Parse zero or more space characters, including at most one newline + private def spnl + seen_newline = false + while c = char_at?(@pos) + if !seen_newline && c == '\n' + seen_newline = true + elsif c != ' ' + break + end + + @pos += 1 + end + + return true + end + + private def match(regex : Regex) : String? + text = @text.byte_slice(@pos) + if match = text.match(regex) + @pos += match.byte_end.not_nil! + return match[0] + end + end + + private def match_main : String? + # This is the same as match(/^[^\n`\[\]\\!<&*_'"]+/m) but done manually (faster) + start_pos = @pos + while (char = char_at?(@pos)) && main_char?(char) + @pos += 1 + end + + if start_pos == @pos + nil + else + @text.byte_slice(start_pos, @pos - start_pos) + end + end + + private def main_char?(char) + case char + when '\n', '`', '[', ']', '\\', '!', '<', '&', '*', '_', '\'', '"' + false + else + true + end + end + + private def text(text) : Node + node = Node.new(Node::Type::Text) + node.text = text.to_s + node + end + + private def char_at?(byte_index) + @text.byte_at?(byte_index).try &.unsafe_chr + end + + private def char_at(byte_index) + @text.byte_at(byte_index).unsafe_chr + end + + private def previous_unicode_char_at(byte_index) + reader = Char::Reader.new(@text, byte_index) + reader.previous_char + end + + private def unicode_char_at?(byte_index) + if byte_index < @text.bytesize + reader = Char::Reader.new(@text, byte_index) + reader.current_char + else + nil + end + end + + # Normalize reference label: collapse internal whitespace + # to single space, remove leading/trailing whitespace, case fold. + def normalize_refernence(text : String) + text[1..-2].strip.downcase.gsub("\n", " ") + end + + private RESERVED_CHARS = ['&', '+', ',', '(', ')', '#', '*', '!', '#', '$', '/', ':', ';', '?', '@', '='] + + def normalize_uri(uri : String) + String.build(capacity: uri.bytesize) do |io| + URI.encode(decode_uri(uri), io) do |byte| + URI.unreserved?(byte) || RESERVED_CHARS.includes?(byte.chr) + end + end + end + + def decode_uri(text : String) + decoded = URI.decode(text) + if decoded.includes?('&') && decoded.includes?(';') + decoded = decoded.gsub(/^&(\w+);$/) { |chars| HTML.decode_entities(chars) } + end + decoded + end + + class Bracket + property node : Node + property! previous : Bracket? + property previous_delimiter : Delimiter? + property index : Int32 + property image : Bool + property active : Bool + property bracket_after : Bool + + def initialize(@node, @previous, @previous_delimiter, @index, @image, @active = true) + @bracket_after = false + end + end + + class Delimiter + property char : Char + property num_delims : Int32 + property orig_delims : Int32 + property node : Node + property! previous : Delimiter? + property! next : Delimiter? + property can_open : Bool + property can_close : Bool + + def initialize(@char, @num_delims, @orig_delims, @node, + @previous, @next, @can_open, @can_close) + end + end + end +end diff --git a/lib/markd/src/markd/renderer.cr b/lib/markd/src/markd/renderer.cr new file mode 100644 index 000000000000..7e65ec1c5ba4 --- /dev/null +++ b/lib/markd/src/markd/renderer.cr @@ -0,0 +1,102 @@ +module Markd + abstract class Renderer + def initialize(@options = Options.new) + @output_io = String::Builder.new + @last_output = "\n" + end + + def output(string : String) + literal(escape(string)) + end + + def literal(string : String) + @output_io << string + @last_output = string + end + + # render a Line Feed character + def newline + literal("\n") if @last_output != "\n" + end + + private ESCAPES = { + '&' => "&", + '"' => """, + '<' => "<", + '>' => ">", + } + + def escape(text) + # If we can determine that the text has no escape chars + # then we can return the text as is, avoiding an allocation + # and a lot of processing in `String#gsub`. + if has_escape_char?(text) + text.gsub(ESCAPES) + else + text + end + end + + private def has_escape_char?(text) + text.each_byte do |byte| + case byte + when '&', '"', '<', '>' + return true + else + next + end + end + false + end + + def render(document : Node) + Utils.timer("renderering", @options.time) do + walker = document.walker + while event = walker.next + node, entering = event + + case node.type + when Node::Type::Heading + heading(node, entering) + when Node::Type::List + list(node, entering) + when Node::Type::Item + item(node, entering) + when Node::Type::BlockQuote + block_quote(node, entering) + when Node::Type::ThematicBreak + thematic_break(node, entering) + when Node::Type::CodeBlock + code_block(node, entering) + when Node::Type::Code + code(node, entering) + when Node::Type::HTMLBlock + html_block(node, entering) + when Node::Type::HTMLInline + html_inline(node, entering) + when Node::Type::Paragraph + paragraph(node, entering) + when Node::Type::Emphasis + emphasis(node, entering) + when Node::Type::SoftBreak + soft_break(node, entering) + when Node::Type::LineBreak + line_break(node, entering) + when Node::Type::Strong + strong(node, entering) + when Node::Type::Link + link(node, entering) + when Node::Type::Image + image(node, entering) + else + text(node, entering) + end + end + end + + @output_io.to_s.sub("\n", "") + end + end +end + +require "./renderers/*" diff --git a/lib/markd/src/markd/renderers/html_renderer.cr b/lib/markd/src/markd/renderers/html_renderer.cr new file mode 100644 index 000000000000..c5f878dbe81a --- /dev/null +++ b/lib/markd/src/markd/renderers/html_renderer.cr @@ -0,0 +1,239 @@ +require "uri" + +module Markd + class HTMLRenderer < Renderer + @disable_tag = 0 + @last_output = "\n" + + HEADINGS = %w(h1 h2 h3 h4 h5 h6) + + def heading(node : Node, entering : Bool) + tag_name = HEADINGS[node.data["level"].as(Int32) - 1] + if entering + newline + tag(tag_name, attrs(node)) + # toc(node) if @options.toc + else + tag(tag_name, end_tag: true) + newline + end + end + + def code(node : Node, entering : Bool) + tag("code") do + output(node.text) + end + end + + def code_block(node : Node, entering : Bool) + languages = node.fence_language ? node.fence_language.split : nil + code_tag_attrs = attrs(node) + pre_tag_attrs = if @options.prettyprint + {"class" => "prettyprint"} + else + nil + end + + if languages && languages.size > 0 && (lang = languages[0]) && !lang.empty? + code_tag_attrs ||= {} of String => String + code_tag_attrs["class"] = "language-#{escape(lang.strip)}" + end + + newline + tag("pre", pre_tag_attrs) do + tag("code", code_tag_attrs) do + output(node.text) + end + end + newline + end + + def thematic_break(node : Node, entering : Bool) + newline + tag("hr", attrs(node), self_closing: true) + newline + end + + def block_quote(node : Node, entering : Bool) + newline + if entering + tag("blockquote", attrs(node)) + else + tag("blockquote", end_tag: true) + end + newline + end + + def list(node : Node, entering : Bool) + tag_name = node.data["type"] == "bullet" ? "ul" : "ol" + + newline + if entering + attrs = attrs(node) + + if (start = node.data["start"].as(Int32)) && start != 1 + attrs ||= {} of String => String + attrs["start"] = start.to_s + end + + tag(tag_name, attrs) + else + tag(tag_name, end_tag: true) + end + newline + end + + def item(node : Node, entering : Bool) + if entering + tag("li", attrs(node)) + else + tag("li", end_tag: true) + newline + end + end + + def link(node : Node, entering : Bool) + if entering + attrs = attrs(node) + destination = node.data["destination"].as(String) + + unless @options.safe && potentially_unsafe(destination) + attrs ||= {} of String => String + destination = resolve_uri(destination, node) + attrs["href"] = escape(destination) + end + + if (title = node.data["title"].as(String)) && !title.empty? + attrs ||= {} of String => String + attrs["title"] = escape(title) + end + + tag("a", attrs) + else + tag("a", end_tag: true) + end + end + + private def resolve_uri(destination, node) + base_url = @options.base_url + return destination unless base_url + + uri = URI.parse(destination) + return destination if uri.absolute? + + base_url.resolve(uri).to_s + end + + def image(node : Node, entering : Bool) + if entering + if @disable_tag == 0 + destination = node.data["destination"].as(String) + if @options.safe && potentially_unsafe(destination) + literal(%()) + end + end + end + + def html_block(node : Node, entering : Bool) + newline + content = @options.safe ? "" : node.text + literal(content) + newline + end + + def html_inline(node : Node, entering : Bool) + content = @options.safe ? "" : node.text + literal(content) + end + + def paragraph(node : Node, entering : Bool) + if (grand_parant = node.parent?.try &.parent?) && grand_parant.type.list? + return if grand_parant.data["tight"] + end + + if entering + newline + tag("p", attrs(node)) + else + tag("p", end_tag: true) + newline + end + end + + def emphasis(node : Node, entering : Bool) + tag("em", end_tag: !entering) + end + + def soft_break(node : Node, entering : Bool) + literal("\n") + end + + def line_break(node : Node, entering : Bool) + tag("br", self_closing: true) + newline + end + + def strong(node : Node, entering : Bool) + tag("strong", end_tag: !entering) + end + + def text(node : Node, entering : Bool) + output(node.text) + end + + private def tag(name : String, attrs = nil, self_closing = false, end_tag = false) + return if @disable_tag > 0 + + @output_io << "<" + @output_io << "/" if end_tag + @output_io << name + attrs.try &.each do |key, value| + @output_io << ' ' << key << '=' << '"' << value << '"' + end + + @output_io << " /" if self_closing + @output_io << ">" + @last_output = ">" + end + + private def tag(name : String, attrs = nil) + tag(name, attrs) + yield + tag(name, end_tag: true) + end + + private def potentially_unsafe(url : String) + url.match(Rule::UNSAFE_PROTOCOL) && !url.match(Rule::UNSAFE_DATA_PROTOCOL) + end + + private def toc(node : Node) + return unless node.type.heading? + + title = URI.encode(node.text) + + @output_io << %(
) + @last_output = ">" + end + + private def attrs(node : Node) + if @options.source_pos && (pos = node.source_pos) + {"data-source-pos" => "#{pos[0][0]}:#{pos[0][1]}-#{pos[1][0]}:#{pos[1][1]}"} + else + nil + end + end + end +end diff --git a/lib/markd/src/markd/rule.cr b/lib/markd/src/markd/rule.cr new file mode 100644 index 000000000000..0e1611643d30 --- /dev/null +++ b/lib/markd/src/markd/rule.cr @@ -0,0 +1,110 @@ +module Markd + module Rule + ESCAPABLE_STRING = %Q([!"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]) + ESCAPED_CHAR_STRING = %Q(\\\\) + ESCAPABLE_STRING + + TAG_NAME_STRING = %Q([A-Za-z][A-Za-z0-9-]*) + ATTRIBUTE_NAME_STRING = %Q([a-zA-Z_:][a-zA-Z0-9:._-]*) + UNQUOTED_VALUE_STRING = %Q([^"'=<>`\\x00-\\x20]+) + SINGLE_QUOTED_VALUE_STRING = %Q('[^']*') + DOUBLE_QUOTED_VALUE_STRING = %Q("[^"]*") + ATTRIBUTE_VALUE_STRING = "(?:" + UNQUOTED_VALUE_STRING + "|" + SINGLE_QUOTED_VALUE_STRING + "|" + DOUBLE_QUOTED_VALUE_STRING + ")" + ATTRIBUTE_VALUE_SPEC_STRING = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTE_VALUE_STRING + ")" + ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTE_NAME_STRING + ATTRIBUTE_VALUE_SPEC_STRING + "?)" + + MAYBE_SPECIAL = {'#', '`', '~', '*', '+', '_', '=', '<', '>', '-'} + THEMATIC_BREAK = /^(?:(?:\*[ \t]*){3,}|(?:_[ \t]*){3,}|(?:-[ \t]*){3,})[ \t]*$/ + + ESCAPABLE = /^#{ESCAPABLE_STRING}/ + + TICKS = /`+/ + + ELLIPSIS = "..." + DASH = /--+/ + + OPEN_TAG = "<" + TAG_NAME_STRING + ATTRIBUTE + "*" + "\\s*/?>" + CLOSE_TAG = "]" + + OPEN_TAG_STRING = "<#{TAG_NAME_STRING}#{ATTRIBUTE}*" + "\\s*/?>" + CLOSE_TAG_STRING = "]" + COMMENT_STRING = "|" + PROCESSING_INSTRUCTION_STRING = "[<][?].*?[?][>]" + DECLARATION_STRING = "]*>" + CDATA_STRING = "" + HTML_TAG_STRING = "(?:#{OPEN_TAG_STRING}|#{CLOSE_TAG_STRING}|#{COMMENT_STRING}|#{PROCESSING_INSTRUCTION_STRING}|#{DECLARATION_STRING}|#{CDATA_STRING})" + HTML_TAG = /^#{HTML_TAG_STRING}/i + + HTML_BLOCK_OPEN = [ + /^<(?:script|pre|style)(?:\s|>|$)/i, + /^/, + /\?>/, + />/, + /\]\]>/, + ] + + LINK_TITLE = Regex.new("^(?:\"(#{ESCAPED_CHAR_STRING}|[^\"\\x00])*\"" + + "|'(#{ESCAPED_CHAR_STRING}|[^'\\x00])*'" + + "|\\((#{ESCAPED_CHAR_STRING}|[^)\\x00])*\\))") + + LINK_LABEL = Regex.new("^\\[(?:[^\\\\\\[\\]]|" + ESCAPED_CHAR_STRING + "|\\\\){0,}\\]") + + LINK_DESTINATION_BRACES = Regex.new("^(?:[<](?:[^ <>\\t\\n\\\\\\x00]|" + ESCAPED_CHAR_STRING + "|\\\\)*[>])") + + EMAIL_AUTO_LINK = /^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/ + AUTO_LINK = /^<[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*>/i + + WHITESPACE_CHAR = /^[ \t\n\x0b\x0c\x0d]/ + WHITESPACE = /[ \t\n\x0b\x0c\x0d]+/ + PUNCTUATION = /\p{P}/ # Regex.new("[!-#%-\*,-/:;\?@\[-\]_\{\}\xA1\xA7\xAB\xB6\xB7\xBB\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E42\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]|\uD800[\uDD00-\uDD02\uDF9F\uDFD0]|\uD801\uDD6F|\uD802[\uDC57\uDD1F\uDD3F\uDE50-\uDE58\uDE7F\uDEF0-\uDEF6\uDF39-\uDF3F\uDF99-\uDF9C]|\uD804[\uDC47-\uDC4D\uDCBB\uDCBC\uDCBE-\uDCC1\uDD40-\uDD43\uDD74\uDD75\uDDC5-\uDDC9\uDDCD\uDDDB\uDDDD-\uDDDF\uDE38-\uDE3D\uDEA9]|\uD805[\uDCC6\uDDC1-\uDDD7\uDE41-\uDE43\uDF3C-\uDF3E]|\uD809[\uDC70-\uDC74]|\uD81A[\uDE6E\uDE6F\uDEF5\uDF37-\uDF3B\uDF44]|\uD82F\uDC9F|\uD836[\uDE87-\uDE8B]") + + UNSAFE_PROTOCOL = /^javascript:|vbscript:|file:|data:/i + UNSAFE_DATA_PROTOCOL = /^data:image\/(?:png|gif|jpeg|webp)/i + + CODE_INDENT = 4 + + # Match Value + # + # - None: no match + # - Container: match container, keep going + # - Leaf: match leaf, no more block starts + enum MatchValue + None + Container + Leaf + end + + # match and parse + abstract def match(parser : Parser, container : Node) : MatchValue + + # token finalize + abstract def token(parser : Parser, container : Node) : Nil + + # continue + abstract def continue(parser : Parser, container : Node) : ContinueStatus + + enum ContinueStatus + Continue + Stop + Return + end + + # accepts_line + abstract def accepts_lines? : Bool + + private def space_or_tab?(char : Char?) : Bool + char == ' ' || char == '\t' + end + end +end + +require "./rules/*" diff --git a/lib/markd/src/markd/rules/block_quote.cr b/lib/markd/src/markd/rules/block_quote.cr new file mode 100644 index 000000000000..0e74a62cc9dd --- /dev/null +++ b/lib/markd/src/markd/rules/block_quote.cr @@ -0,0 +1,51 @@ +module Markd::Rule + struct BlockQuote + include Rule + + def match(parser : Parser, container : Node) : MatchValue + if match?(parser) + seek(parser) + parser.close_unmatched_blocks + parser.add_child(Node::Type::BlockQuote, parser.next_nonspace) + + MatchValue::Container + else + MatchValue::None + end + end + + def continue(parser : Parser, container : Node) : ContinueStatus + if match?(parser) + seek(parser) + ContinueStatus::Continue + else + ContinueStatus::Stop + end + end + + def token(parser : Parser, container : Node) : Nil + # do nothing + end + + def can_contain?(type : Node::Type) : Bool + !type.item? + end + + def accepts_lines? : Bool + false + end + + private def match?(parser) + !parser.indented && parser.line[parser.next_nonspace]? == '>' + end + + private def seek(parser : Parser) + parser.advance_next_nonspace + parser.advance_offset(1, false) + + if space_or_tab?(parser.line[parser.offset]?) + parser.advance_offset(1, true) + end + end + end +end diff --git a/lib/markd/src/markd/rules/code_block.cr b/lib/markd/src/markd/rules/code_block.cr new file mode 100644 index 000000000000..e42c0ee224e7 --- /dev/null +++ b/lib/markd/src/markd/rules/code_block.cr @@ -0,0 +1,95 @@ +module Markd::Rule + struct CodeBlock + include Rule + + CODE_FENCE = /^`{3,}(?!.*`)|^~{3,}(?!.*~)/ + CLOSING_CODE_FENCE = /^(?:`{3,}|~{3,})(?= *$)/ + + def match(parser : Parser, container : Node) : MatchValue + if !parser.indented && + (match = parser.line[parser.next_nonspace..-1].match(CODE_FENCE)) + # fenced + fence_length = match[0].size + + parser.close_unmatched_blocks + node = parser.add_child(Node::Type::CodeBlock, parser.next_nonspace) + node.fenced = true + node.fence_length = fence_length + node.fence_char = match[0][0].to_s + node.fence_offset = parser.indent + + parser.advance_next_nonspace + parser.advance_offset(fence_length, false) + + MatchValue::Leaf + elsif parser.indented && !parser.blank && (tip = parser.tip) && + !tip.type.paragraph? && + (!container.type.list? || container.data["padding"].as(Int32) >= 4) + # indented + parser.advance_offset(Rule::CODE_INDENT, true) + parser.close_unmatched_blocks + parser.add_child(Node::Type::CodeBlock, parser.offset) + + MatchValue::Leaf + else + MatchValue::None + end + end + + def continue(parser : Parser, container : Node) : ContinueStatus + line = parser.line + indent = parser.indent + if container.fenced? + # fenced + match = indent <= 3 && + line[parser.next_nonspace]? == container.fence_char[0] && + line[parser.next_nonspace..-1].match(CLOSING_CODE_FENCE) + + if match && match.as(Regex::MatchData)[0].size >= container.fence_length + # closing fence - we're at end of line, so we can return + parser.token(container, parser.current_line) + return ContinueStatus::Return + else + # skip optional spaces of fence offset + index = container.fence_offset + while index > 0 && space_or_tab?(parser.line[parser.offset]?) + parser.advance_offset(1, true) + index -= 1 + end + end + else + # indented + if indent >= Rule::CODE_INDENT + parser.advance_offset(Rule::CODE_INDENT, true) + elsif parser.blank + parser.advance_next_nonspace + else + return ContinueStatus::Stop + end + end + + ContinueStatus::Continue + end + + def token(parser : Parser, container : Node) : Nil + if container.fenced? + # fenced + first_line, _, text = container.text.partition('\n') + + container.fence_language = Utils.decode_entities_string(first_line.strip) + container.text = text + else + # indented + container.text = container.text.gsub(/(\n *)+$/, "\n") + end + end + + def can_contain?(type) + false + end + + def accepts_lines? : Bool + true + end + end +end diff --git a/lib/markd/src/markd/rules/document.cr b/lib/markd/src/markd/rules/document.cr new file mode 100644 index 000000000000..6ea3c2eef074 --- /dev/null +++ b/lib/markd/src/markd/rules/document.cr @@ -0,0 +1,25 @@ +module Markd::Rule + struct Document + include Rule + + def match(parser : Parser, container : Node) : MatchValue + MatchValue::None + end + + def continue(parser : Parser, container : Node) : ContinueStatus + ContinueStatus::Continue + end + + def token(parser : Parser, container : Node) : Nil + # do nothing + end + + def can_contain?(type : Node::Type) : Bool + !type.item? + end + + def accepts_lines? : Bool + false + end + end +end diff --git a/lib/markd/src/markd/rules/heading.cr b/lib/markd/src/markd/rules/heading.cr new file mode 100644 index 000000000000..0419ec215e26 --- /dev/null +++ b/lib/markd/src/markd/rules/heading.cr @@ -0,0 +1,68 @@ +module Markd::Rule + struct Heading + include Rule + + ATX_HEADING_MARKER = /^\#{1,6}(?:[ \t]+|$)/ + SETEXT_HEADING_MARKER = /^(?:=+|-+)[ \t]*$/ + + def match(parser : Parser, container : Node) : MatchValue + if match = match?(parser, ATX_HEADING_MARKER) + # ATX Heading matched + parser.advance_next_nonspace + parser.advance_offset(match[0].size, false) + parser.close_unmatched_blocks + + container = parser.add_child(Node::Type::Heading, parser.next_nonspace) + container.data["level"] = match[0].strip.size + container.text = parser.line[parser.offset..-1] + .sub(/^ *#+ *$/, "") + .sub(/ +#+ *$/, "") + + parser.advance_offset(parser.line.size - parser.offset) + + MatchValue::Leaf + elsif (match = match?(parser, SETEXT_HEADING_MARKER)) && + container.type.paragraph? && (parent = container.parent?) && + !parent.type.block_quote? + # Setext Heading matched + parser.close_unmatched_blocks + heading = Node.new(Node::Type::Heading) + heading.source_pos = container.source_pos + heading.data["level"] = match[0][0] == '=' ? 1 : 2 + heading.text = container.text + + container.insert_after(heading) + container.unlink + + parser.tip = heading + parser.advance_offset(parser.line.size - parser.offset, false) + + MatchValue::Leaf + else + MatchValue::None + end + end + + def token(parser : Parser, container : Node) : Nil + # do nothing + end + + def continue(parser : Parser, container : Node) : ContinueStatus + # a heading can never container > 1 line, so fail to match + ContinueStatus::Stop + end + + def can_contain?(type) + false + end + + def accepts_lines? : Bool + false + end + + private def match?(parser : Parser, regex : Regex) : Regex::MatchData? + match = parser.line[parser.next_nonspace..-1].match(regex) + !parser.indented && match ? match : nil + end + end +end diff --git a/lib/markd/src/markd/rules/html_block.cr b/lib/markd/src/markd/rules/html_block.cr new file mode 100644 index 000000000000..93342d024461 --- /dev/null +++ b/lib/markd/src/markd/rules/html_block.cr @@ -0,0 +1,43 @@ +module Markd::Rule + struct HTMLBlock + include Rule + + def match(parser : Parser, container : Node) : MatchValue + if !parser.indented && parser.line[parser.next_nonspace]? == '<' + text = parser.line[parser.next_nonspace..-1] + block_type_size = Rule::HTML_BLOCK_OPEN.size - 1 + + Rule::HTML_BLOCK_OPEN.each_with_index do |regex, index| + if (text.match(regex) && + (index < block_type_size || !container.type.paragraph?)) + parser.close_unmatched_blocks + # We don't adjust parser.offset; + # spaces are part of the HTML block: + node = parser.add_child(Node::Type::HTMLBlock, parser.offset) + node.data["html_block_type"] = index + + return MatchValue::Leaf + end + end + end + + MatchValue::None + end + + def continue(parser : Parser, container : Node) : ContinueStatus + (parser.blank && {5, 6}.includes?(container.data["html_block_type"])) ? ContinueStatus::Stop : ContinueStatus::Continue + end + + def token(parser : Parser, container : Node) : Nil + container.text = container.text.gsub(/(\n *)+$/, "") + end + + def can_contain?(type) + false + end + + def accepts_lines? : Bool + true + end + end +end diff --git a/lib/markd/src/markd/rules/item.cr b/lib/markd/src/markd/rules/item.cr new file mode 100644 index 000000000000..dac040b001a0 --- /dev/null +++ b/lib/markd/src/markd/rules/item.cr @@ -0,0 +1,41 @@ +module Markd::Rule + struct Item + include Rule + + def match(parser : Parser, container : Node) : MatchValue + # match and parse in Rule::List + MatchValue::None + end + + def continue(parser : Parser, container : Node) : ContinueStatus + indent_offset = container.data["marker_offset"].as(Int32) + container.data["padding"].as(Int32) + + if parser.blank + if container.first_child? + parser.advance_next_nonspace + else + # Blank line after empty list item + return ContinueStatus::Stop + end + elsif parser.indent >= indent_offset + parser.advance_offset(indent_offset, true) + else + return ContinueStatus::Stop + end + + ContinueStatus::Continue + end + + def token(parser : Parser, container : Node) : Nil + # do nothing + end + + def can_contain?(type : Node::Type) + !type.item? + end + + def accepts_lines? : Bool + false + end + end +end diff --git a/lib/markd/src/markd/rules/list.cr b/lib/markd/src/markd/rules/list.cr new file mode 100644 index 000000000000..0d745f67df11 --- /dev/null +++ b/lib/markd/src/markd/rules/list.cr @@ -0,0 +1,149 @@ +module Markd::Rule + struct List + include Rule + + BULLET_LIST_MARKERS = {'*', '+', '-'} + ORDERED_LIST_MARKERS = {'.', ')'} + + def match(parser : Parser, container : Node) : MatchValue + if (!parser.indented || container.type.list?) + data = parse_list_marker(parser, container) + return MatchValue::None unless data && !data.empty? + + parser.close_unmatched_blocks + if !parser.tip.type.list? || !list_match?(container.data, data) + list_node = parser.add_child(Node::Type::List, parser.next_nonspace) + list_node.data = data + end + + item_node = parser.add_child(Node::Type::Item, parser.next_nonspace) + item_node.data = data + + MatchValue::Container + else + MatchValue::None + end + end + + def continue(parser : Parser, container : Node) : ContinueStatus + ContinueStatus::Continue + end + + def token(parser : Parser, container : Node) : Nil + item = container.first_child? + while item + if ends_with_blankline?(item) && item.next? + container.data["tight"] = false + break + end + + subitem = item.first_child? + while subitem + if ends_with_blankline?(subitem) && (item.next? || subitem.next?) + container.data["tight"] = false + break + end + + subitem = subitem.next? + end + + item = item.next? + end + end + + def can_contain?(type) + type.item? + end + + def accepts_lines? : Bool + false + end + + private def list_match?(list_data, item_data) + list_data["type"] == item_data["type"] && + list_data["delimiter"] == item_data["delimiter"] && + list_data["bullet_char"] == item_data["bullet_char"] + end + + private def parse_list_marker(parser : Parser, container : Node) : Node::DataType + line = parser.line[parser.next_nonspace..-1] + + empty_data = {} of String => Node::DataValue + data = { + "delimiter" => 0, + "marker_offset" => parser.indent, + "bullet_char" => "", + "tight" => true, # lists are tight by default + "start" => 1, + } of String => Node::DataValue + + if BULLET_LIST_MARKERS.includes?(line[0]) + data["type"] = "bullet" + data["bullet_char"] = line[0].to_s + first_match_size = 1 + else + pos = 0 + while line[pos]?.try &.ascii_number? + pos += 1 + end + number = pos >= 1 ? line[0..pos - 1].to_i : -1 + if pos >= 1 && pos <= 9 && ORDERED_LIST_MARKERS.includes?(line[pos]?) && + (!container.type.paragraph? || number == 1) + data["type"] = "ordered" + data["start"] = number + data["delimiter"] = line[pos].to_s + first_match_size = pos + 1 + else + return empty_data + end + end + + next_char = parser.line[parser.next_nonspace + first_match_size]? + unless next_char.nil? || space_or_tab?(next_char) + return empty_data + end + + if container.type.paragraph? && + parser.line[(parser.next_nonspace + first_match_size)..-1].each_char.all? &.ascii_whitespace? + return empty_data + end + + parser.advance_next_nonspace + parser.advance_offset(first_match_size, true) + spaces_start_column = parser.column + spaces_start_offset = parser.offset + + loop do + parser.advance_offset(1, true) + next_char = parser.line[parser.offset]? + + break unless parser.column - spaces_start_column < 5 && space_or_tab?(next_char) + end + + blank_item = parser.line[parser.offset]?.nil? + spaces_after_marker = parser.column - spaces_start_column + if spaces_after_marker >= 5 || spaces_after_marker < 1 || blank_item + data["padding"] = first_match_size + 1 + parser.column = spaces_start_column + parser.offset = spaces_start_offset + + parser.advance_offset(1, true) if space_or_tab?(parser.line[parser.offset]?) + else + data["padding"] = first_match_size + spaces_after_marker + end + + data + end + + private def ends_with_blankline?(container : Node) : Bool + while container + return true if container.last_line_blank? + + break unless container.type == Node::Type::List || container.type == Node::Type::Item + container = container.last_child? + end + + false + end + end +end diff --git a/lib/markd/src/markd/rules/paragraph.cr b/lib/markd/src/markd/rules/paragraph.cr new file mode 100644 index 000000000000..c843dd553edc --- /dev/null +++ b/lib/markd/src/markd/rules/paragraph.cr @@ -0,0 +1,33 @@ +module Markd::Rule + struct Paragraph + include Rule + + def match(parser : Parser, container : Node) : MatchValue + MatchValue::None + end + + def continue(parser : Parser, container : Node) : ContinueStatus + parser.blank ? ContinueStatus::Stop : ContinueStatus::Continue + end + + def token(parser : Parser, container : Node) : Nil + has_reference_defs = false + + while container.text[0]? == '[' && + (pos = parser.inline_lexer.reference(container.text, parser.refmap)) && pos > 0 + container.text = container.text.byte_slice(pos) + has_reference_defs = true + end + + container.unlink if has_reference_defs && container.text.each_char.all? &.ascii_whitespace? + end + + def can_contain?(type) + false + end + + def accepts_lines? : Bool + true + end + end +end diff --git a/lib/markd/src/markd/rules/thematic_break.cr b/lib/markd/src/markd/rules/thematic_break.cr new file mode 100644 index 000000000000..b1ace1485946 --- /dev/null +++ b/lib/markd/src/markd/rules/thematic_break.cr @@ -0,0 +1,35 @@ +module Markd::Rule + struct ThematicBreak + include Rule + + THEMATIC_BREAK = /^(?:(?:\*[ \t]*){3,}|(?:_[ \t]*){3,}|(?:-[ \t]*){3,})[ \t]*$/ + + def match(parser : Parser, container : Node) : MatchValue + if !parser.indented && parser.line[parser.next_nonspace..-1].match(THEMATIC_BREAK) + parser.close_unmatched_blocks + parser.add_child(Node::Type::ThematicBreak, parser.next_nonspace) + parser.advance_offset(parser.line.size - parser.offset, false) + MatchValue::Leaf + else + MatchValue::None + end + end + + def continue(parser : Parser, container : Node) : ContinueStatus + # a thematic break can never container > 1 line, so fail to match: + ContinueStatus::Stop + end + + def token(parser : Parser, container : Node) : Nil + # do nothing + end + + def can_contain?(type) + false + end + + def accepts_lines? : Bool + false + end + end +end diff --git a/lib/markd/src/markd/utils.cr b/lib/markd/src/markd/utils.cr new file mode 100644 index 000000000000..deb40668610e --- /dev/null +++ b/lib/markd/src/markd/utils.cr @@ -0,0 +1,20 @@ +require "json" + +module Markd + module Utils + def self.timer(label : String, measure_time? : Bool) + return yield unless measure_time? + + start_time = Time.utc + yield + + puts "#{label}: #{(Time.utc - start_time).total_milliseconds}ms" + end + + DECODE_ENTITIES_REGEX = Regex.new("\\\\" + Rule::ESCAPABLE_STRING, Regex::Options::IGNORE_CASE) + + def self.decode_entities_string(text : String) : String + HTML.decode_entities(text).gsub(DECODE_ENTITIES_REGEX) { |text| text[1].to_s } + end + end +end diff --git a/lib/markd/src/markd/version.cr b/lib/markd/src/markd/version.cr new file mode 100644 index 000000000000..353bf563d86f --- /dev/null +++ b/lib/markd/src/markd/version.cr @@ -0,0 +1,3 @@ +module Markd + VERSION = "0.4.0" +end diff --git a/man/crystal.1 b/man/crystal.1 index 2c54fee9f60c..541163934fc2 100644 --- a/man/crystal.1 +++ b/man/crystal.1 @@ -234,6 +234,11 @@ Please see .Sm "ENVIRONMENT VARIABLES". .Pp .It +.It Sy CRYSTAL_LIBRARY_PATH +Please see +.Sm "ENVIRONMENT VARIABLES". +.Pp +.It .It Sy CRYSTAL_PATH Please see .Sm "ENVIRONMENT VARIABLES". @@ -384,8 +389,17 @@ Show version. Defines path where Crystal caches partial compilation results for faster subsequent builds. This path is also used to temporarily store executables when Crystal programs are run with 'crystal run' rather than 'crystal build'. .Pp .It +.It Sy CRYSTAL_LIBRARY_PATH +Defines paths where Crystal searches for (binary) libraries. Multiple paths can be separated by ":". +These paths are passed to the linker as `-L` flags. +.Pp +The pattern '$ORIGIN' at the start of the path expands to the directory where the compiler binary is located. For example, '$ORIGIN/../lib/crystal' resolves the standard library path relative to the compiler location in a generic way, independent of the absolute paths (assuming the relative location is correct). +.Pp +.It .It Sy CRYSTAL_PATH -Defines paths where Crystal searches for required files. +Defines paths where Crystal searches for required source files. Multiple paths can be separated by ":". +.Pp +The pattern '$ORIGIN' at the start of the path expands to the directory where the compiler binary is located. For example, '$ORIGIN/../share/crystal/src' resolves the standard library path relative to the compiler location in a generic way, independent of the absolute paths (assuming the relative location is correct). .Pp .It .It Sy CRYSTAL_OPTS diff --git a/shell.nix b/shell.nix index 81725591ad23..95c637b69ecc 100644 --- a/shell.nix +++ b/shell.nix @@ -52,18 +52,18 @@ let # Hashes obtained using `nix-prefetch-url --unpack ` latestCrystalBinary = genericBinary ({ x86_64-darwin = { - url = "https://github.com/crystal-lang/crystal/releases/download/1.1.0/crystal-1.1.0-1-darwin-x86_64.tar.gz"; - sha256 = "sha256:0dk893g5v3y11hfmr6viskhajnlippwcs8ra8azxa9rjh47lx8zg"; + url = "https://github.com/crystal-lang/crystal/releases/download/1.1.1/crystal-1.1.1-1-darwin-x86_64.tar.gz"; + sha256 = "sha256:0amx2ggcycg9sqacfiq7b7w00cjras17sh466y3k645r8gp09zby"; }; x86_64-linux = { - url = "https://github.com/crystal-lang/crystal/releases/download/1.1.0/crystal-1.1.0-1-linux-x86_64.tar.gz"; - sha256 = "sha256:1n967087p0km0v4pr7xyl4gg5cfl1zap7kas94gw4cs4a90irwgd"; + url = "https://github.com/crystal-lang/crystal/releases/download/1.1.1/crystal-1.1.1-1-linux-x86_64.tar.gz"; + sha256 = "sha256:13ask28zqvg8vawldy5axc9y1ijsd7ggah5kzmnk7jxhf3z1ambx"; }; i686-linux = { - url = "https://github.com/crystal-lang/crystal/releases/download/1.1.0/crystal-1.1.0-1-linux-i686.tar.gz"; - sha256 = "sha256:06qzhrq4la7fkk1y6nr5kq52gxfnrlbnh9lg7ppbxqglr39ygml3"; + url = "https://github.com/crystal-lang/crystal/releases/download/1.1.1/crystal-1.1.1-1-linux-i686.tar.gz"; + sha256 = "sha256:0xvbc4bpw3xc94ln3wx46k4kb4bzz9x7735dd1253ka2cl8idzp3"; }; }.${pkgs.stdenv.system}); diff --git a/spec/compiler/codegen/c_abi/c_abi_x86_64_spec.cr b/spec/compiler/codegen/c_abi/c_abi_x86_64_spec.cr index 84a92e7075f6..5383e1030efa 100644 --- a/spec/compiler/codegen/c_abi/c_abi_x86_64_spec.cr +++ b/spec/compiler/codegen/c_abi/c_abi_x86_64_spec.cr @@ -153,7 +153,12 @@ require "../../../spec_helper" )) str = mod.to_s str.scan(/sret/).size.should eq(2) - str.should contain("sret, i32") # sret goes as first argument + + if LibLLVM::IS_LT_120 + str.should contain("sret, i32") # sret goes as first argument + else + str.should contain("sret(%\"struct.LibFoo::Struct\") %0, i32") # sret goes as first argument + end end end {% end %} diff --git a/spec/compiler/codegen/debug_spec.cr b/spec/compiler/codegen/debug_spec.cr index fcf1c7d71ab9..30046b87b773 100644 --- a/spec/compiler/codegen/debug_spec.cr +++ b/spec/compiler/codegen/debug_spec.cr @@ -188,6 +188,37 @@ describe "Code gen: debug" do @@x end end - ), debug: Crystal::Debug::All) + ), debug: Crystal::Debug::All) + end + + it "stores and restores debug location after jumping to main (2)" do + codegen(%( + module Foo + @@x : Int32 = begin + y = 1 + end + + def self.x + @@x + end + end + + Foo.x + ), debug: Crystal::Debug::All) + end + + it "stores and restores debug location after jumping to main (3)" do + codegen(%( + def raise(exception) + x = uninitialized NoReturn + x + end + + lib LibFoo + $foo : -> + end + + LibFoo.foo = ->{ } + ), debug: Crystal::Debug::All) end end diff --git a/spec/compiler/codegen/generic_class_spec.cr b/spec/compiler/codegen/generic_class_spec.cr index ed9b800fc91b..49baf0b62d33 100644 --- a/spec/compiler/codegen/generic_class_spec.cr +++ b/spec/compiler/codegen/generic_class_spec.cr @@ -142,6 +142,29 @@ describe "Code gen: generic class type" do )).to_i.should eq(1) end + it "doesn't run generic instance var initializers in formal superclass's context (#4753)" do + run(%( + class Foo(T) + @foo = T.new + + def foo + @foo + end + end + + class Bar(T) < Foo(T) + end + + class Baz + def baz + 7 + end + end + + Bar(Baz).new.foo.baz + )).to_i.should eq(7) + end + it "codegens static array size after instantiating" do run(%( struct StaticArray(T, N) diff --git a/spec/compiler/codegen/macro_spec.cr b/spec/compiler/codegen/macro_spec.cr index d5c940764302..5fb2567b6bfa 100644 --- a/spec/compiler/codegen/macro_spec.cr +++ b/spec/compiler/codegen/macro_spec.cr @@ -1864,7 +1864,7 @@ describe "Code gen: macro" do end Foo.new.bar - )).to_string.should eq("top_level") + )).to_string.should eq("main") end it "responds correctly to has_constant? with @top_level" do diff --git a/spec/compiler/codegen/primitives_spec.cr b/spec/compiler/codegen/primitives_spec.cr index e1d26fe730c8..38d0a18b0ba0 100644 --- a/spec/compiler/codegen/primitives_spec.cr +++ b/spec/compiler/codegen/primitives_spec.cr @@ -34,6 +34,10 @@ describe "Code gen: primitives" do )).to_i.should eq(1) end + it "skips bounds checking when to_i produces same type" do + run("1.to_i32").to_i.should eq(1) + end + it "codegens char" do run("'a'").to_i.should eq('a'.ord) end diff --git a/spec/compiler/codegen/return_spec.cr b/spec/compiler/codegen/return_spec.cr index ccceac8c31f3..07a2d2243ade 100644 --- a/spec/compiler/codegen/return_spec.cr +++ b/spec/compiler/codegen/return_spec.cr @@ -81,4 +81,26 @@ describe "Code gen: return" do bar )).to_i.should eq(123) end + + it "forms a tuple from multiple return values" do + run(%( + def foo + return 5, 3 + end + + v = foo + v[0] &- v[1] + )).to_i.should eq(2) + end + + it "flattens splats inside multiple return values" do + run(%( + def foo + return 1, *{3, 9}, 27 + end + + v = foo + v[3] &- v[2] + )).to_i.should eq(18) + end end diff --git a/spec/compiler/crystal/tools/doc/doc_renderer_spec.cr b/spec/compiler/crystal/tools/doc/doc_renderer_spec.cr index 4938060e24b2..d4f1ff5770df 100644 --- a/spec/compiler/crystal/tools/doc/doc_renderer_spec.cr +++ b/spec/compiler/crystal/tools/doc/doc_renderer_spec.cr @@ -1,25 +1,27 @@ require "../../../spec_helper" private def assert_code_link(obj, before, after = before) - renderer = Doc::Markdown::DocRenderer.new(obj, IO::Memory.new) + renderer = Doc::MarkdDocRenderer.new(obj, Markd::Options.new) renderer.expand_code_links(before).should eq(after) end private def it_renders(context, input, output, file = __FILE__, line = __LINE__) it "renders #{input.inspect}", file, line do - String.build do |io| - c = context - c ||= begin - program = Program.new - generator = Doc::Generator.new(program, [""]) - generator.type(program) - end - Doc::Markdown.parse input, Doc::Markdown::DocRenderer.new(c, io) - end.should eq(output), file: file, line: line + c = context + c ||= begin + program = Program.new + generator = Doc::Generator.new(program, [""]) + generator.type(program) + end + options = Markd::Options.new + document = Markd::Parser.parse(input, options) + renderer = Doc::MarkdDocRenderer.new(c, options) + + renderer.render(document).chomp.should eq(output), file: file, line: line end end -describe Doc::Markdown::DocRenderer do +describe Doc::MarkdDocRenderer do describe "expand_code_links" do program = semantic(" class Base @@ -300,9 +302,32 @@ describe Doc::Markdown::DocRenderer do end end - describe "renders" do + describe "renders code blocks" do it_renders nil, "```crystal\nHello\nWorld\n```", %(
Hello\nWorld
) it_renders nil, "```cr\nHello\nWorld\n```", %(
Hello\nWorld
) it_renders nil, "```\nHello\nWorld\n```", %(
Hello\nWorld
) end + + describe "renders links" do + it_renders nil, "[foo](http://example.com/foo)", %(

foo

) + + program = semantic("class Foo; end", wants_doc: true).program + it_renders Doc::Generator.new(program, [""]).type(program), "[`Foo`](http://example.com/foo)", %(

Foo

) + + it_renders nil, %([filter](https://docs.celestine.dev/Celestine/Meta/Context.html#filter(&block:Celestine::Filter-%3ECelestine::Filter)-instance-method)), + %(

filter

) + end + + describe "renders headline" do + it_renders nil, "## Foo Bar", <<-HTML +

+ Foo Bar

+ HTML + end + + describe "renders html" do + it_renders nil, %(

Foo

), %(

Foo

) + end end diff --git a/spec/compiler/crystal/tools/doc/generator_spec.cr b/spec/compiler/crystal/tools/doc/generator_spec.cr index f659e580e330..e3b7f58c15f9 100644 --- a/spec/compiler/crystal/tools/doc/generator_spec.cr +++ b/spec/compiler/crystal/tools/doc/generator_spec.cr @@ -111,7 +111,7 @@ describe Doc::Generator do a_def = Def.new "foo" a_def.add_annotation(program.types[ann].as(Crystal::AnnotationType), Annotation.new(Crystal::Path.new(ann), ["lorem ipsum".string] of ASTNode)) doc_method = Doc::Method.new generator, doc_type, a_def, false - doc_method.formatted_summary.should eq %(

#{ann.upcase} lorem ipsum

\n\n) + doc_method.formatted_summary.should eq %(

#{ann.upcase} lorem ipsum

) end end @@ -125,7 +125,7 @@ describe Doc::Generator do a_def.doc = "Some Method" a_def.add_annotation(program.types[ann].as(Crystal::AnnotationType), Annotation.new(Crystal::Path.new(ann), ["lorem ipsum".string] of ASTNode)) doc_method = Doc::Method.new generator, doc_type, a_def, false - doc_method.formatted_summary.should eq %(

Some Method

\n\n

#{ann.upcase} lorem ipsum

\n\n) + doc_method.formatted_summary.should eq %(

Some Method

\n

#{ann.upcase} lorem ipsum

) end end end @@ -176,7 +176,7 @@ describe Doc::Generator do a_def = Def.new "foo" a_def.add_annotation(program.types[ann].as(Crystal::AnnotationType), Annotation.new(Crystal::Path.new(ann), ["lorem ipsum".string] of ASTNode)) doc_method = Doc::Method.new generator, doc_type, a_def, false - doc_method.formatted_doc.should eq %(

#{ann.upcase} lorem ipsum

\n\n) + doc_method.formatted_doc.should eq %(

#{ann.upcase} lorem ipsum

) end end @@ -190,7 +190,7 @@ describe Doc::Generator do a_def.doc = "Some Method" a_def.add_annotation(program.types[ann].as(Crystal::AnnotationType), Annotation.new(Crystal::Path.new(ann), ["lorem ipsum".string] of ASTNode)) doc_method = Doc::Method.new generator, doc_type, a_def, false - doc_method.formatted_doc.should eq %(

Some Method

\n\n

#{ann.upcase} lorem ipsum

\n\n) + doc_method.formatted_doc.should eq %(

Some Method

\n

#{ann.upcase} lorem ipsum

) end end end @@ -226,7 +226,7 @@ describe Doc::Generator do a_def = Def.new "foo" a_def.doc = "Some Method\n\nMore Data" doc_method = Doc::Method.new generator, doc_type, a_def, false - doc_method.formatted_doc.should eq %(

Some Method

\n\n

More Data

) + doc_method.formatted_doc.should eq %(

Some Method

\n

More Data

) end end diff --git a/spec/compiler/crystal/tools/doc/markdown_spec.cr b/spec/compiler/crystal/tools/doc/markdown_spec.cr deleted file mode 100644 index 1a05c7a7876d..000000000000 --- a/spec/compiler/crystal/tools/doc/markdown_spec.cr +++ /dev/null @@ -1,139 +0,0 @@ -require "spec" -require "../../../../../src/compiler/crystal/tools/doc/markdown" - -private def assert_render(input, output, file = __FILE__, line = __LINE__) - it "renders #{input.inspect}", file, line do - Crystal::Doc::Markdown.to_html(input).should eq(output), file: file, line: line - end -end - -describe Crystal::Doc::Markdown do - assert_render "", "" - assert_render "Hello", "

Hello

" - assert_render "Hello\nWorld", "

Hello\nWorld

" - assert_render "Hello\n\nWorld", "

Hello

\n\n

World

" - assert_render "Hello\n\n\n\n\nWorld", "

Hello

\n\n

World

" - assert_render "Hello\n \nWorld", "

Hello

\n\n

World

" - assert_render "Hello\nWorld\n\nGood\nBye", "

Hello\nWorld

\n\n

Good\nBye

" - - assert_render "*Hello*", "

Hello

" - assert_render "*Hello", "

*Hello

" - assert_render "*Hello *", "

*Hello *

" - assert_render "*Hello World*", "

Hello World

" - assert_render "これは *みず* です", "

これは みず です

" - - assert_render "**Hello**", "

Hello

" - assert_render "**Hello **", "

**Hello **

" - - assert_render "_Hello_", "

Hello

" - assert_render "_Hello", "

_Hello

" - assert_render "_Hello _", "

_Hello _

" - assert_render "_Hello World_", "

Hello World

" - - assert_render "__Hello__", "

Hello

" - assert_render "__Hello __", "

__Hello __

" - - assert_render "this_is_not_italic", "

this_is_not_italic

" - assert_render "this__is__not__bold", "

this__is__not__bold

" - - assert_render "`Hello`", "

Hello

" - - assert_render "Hello\n=", "

#{Crystal::Doc.anchor_link "hello"}Hello

" - assert_render "Hello\n===", "

#{Crystal::Doc.anchor_link "hello"}Hello

" - assert_render "Hello\n===\nWorld", "

#{Crystal::Doc.anchor_link "hello"}Hello

\n\n

World

" - assert_render "Hello\n===World", "

Hello\n===World

" - - assert_render "Hello\n-", "

#{Crystal::Doc.anchor_link "hello"}Hello

" - assert_render "Hello\n-", "

#{Crystal::Doc.anchor_link "hello"}Hello

" - assert_render "Hello\n-World", "

Hello\n-World

" - - assert_render "#Hello", "

#{Crystal::Doc.anchor_link "hello"}Hello

" - assert_render "# Hello ", "

#{Crystal::Doc.anchor_link "hello"}Hello

" - assert_render "#Hel%^&|>/\\lo", "

#{Crystal::Doc.anchor_link "hello"}Hel%^&|>/\\lo

" - assert_render "# Hello", "

#{Crystal::Doc.anchor_link "hello"}Hello

" - assert_render "# Hello", "

#{Crystal::Doc.anchor_link "hello"}Hello

" - assert_render "## Hello", "

#{Crystal::Doc.anchor_link "hello"}Hello

" - assert_render "## Multi Word Heading", "

#{Crystal::Doc.anchor_link "multi-word-heading"}Multi Word Heading

" - assert_render "## Foo: Bar", "

#{Crystal::Doc.anchor_link "foo-bar"}Foo: Bar

" - assert_render "### Hello", "

#{Crystal::Doc.anchor_link "hello"}Hello

" - assert_render "#### Hello", "

#{Crystal::Doc.anchor_link "hello"}Hello

" - assert_render "##### Hello", "
#{Crystal::Doc.anchor_link "hello"}Hello
" - assert_render "###### Hello", "
#{Crystal::Doc.anchor_link "hello"}Hello
" - assert_render "####### Hello", "
#{Crystal::Doc.anchor_link "hello"}# Hello
" - - assert_render "# Hello\nWorld", "

#{Crystal::Doc.anchor_link "hello"}Hello

\n\n

World

" - assert_render "# Hello\n---", "

#{Crystal::Doc.anchor_link "hello"}Hello

\n\n
" - - assert_render " Hello", "
Hello
" - assert_render " Hello\n World", "
Hello\nWorld
" - assert_render " Hello\n\n World", "
Hello\n\nWorld
" - assert_render " Hello\n\n \n World", "
Hello\n\n\nWorld
" - assert_render " Hello\n World", "
Hello
\n\n

World

" - assert_render " Hello\n\n\nWorld", "
Hello
\n\n

World

" - - assert_render "```crystal\nHello\nWorld\n```", %(
Hello\nWorld
) - assert_render "```cr\nHello\nWorld\n```", %(
Hello\nWorld
) - assert_render "```\nHello\nWorld\n```", %(
Hello\nWorld
) - assert_render "Hello\n```\nWorld\n```", "

Hello

\n\n
World
" - assert_render "```\n---\n```", "
---
" - assert_render "````\n---\n````", "
---
" - # TODO: this should render as one code block: - assert_render "```invisible man```", "

invisible man

" - - assert_render "> Hello World\n", "
Hello World
" - assert_render "> __Hello World__", "
Hello World
" - assert_render "> This spawns\nmultiple\nlines\n\ntext", "
This spawns\nmultiple\nlines
\n\n

text

" - - assert_render "* Hello", "
  • Hello
" - assert_render "* Hello\n* World", "
  • Hello
  • World
" - assert_render "* Hello\n* World\n * Crystal", "
  • Hello
  • World
    • Crystal
" - assert_render "* Level1\n * Level2\n * Level2\n* Level1", "
  • Level1
    • Level2
    • Level2
  • Level1
" - assert_render "* Level1\n * Level2\n * Level2", "
  • Level1
    • Level2
    • Level2
" - assert_render "* Hello\nWorld", "
  • Hello\nWorld
" - assert_render "Params:\n* Foo\n* Bar", "

Params:

\n\n
  • Foo
  • Bar
" - - assert_render "* Hello\n* World\n\n```\nHello World\n```", "
  • Hello
  • World
\n\n
Hello World
" - assert_render "1. Hello\n2. World\n\n```\nHello World\n```", "
  1. Hello
  2. World
\n\n
Hello World
" - - assert_render "+ Hello", "
  • Hello
" - assert_render "- Hello", "
  • Hello
" - - assert_render "* Hello\n+ World\n- Crystal", "
  • Hello
\n\n
  • World
\n\n
  • Crystal
" - - assert_render "* This spawns\nmultiple\nlines\n\ntext", "
  • This spawns\nmultiple\nlines
\n\n

text

" - assert_render "* Two\nlines\n* This spawns\nmultiple\nlines\n\ntext", "
  • Two\nlines
  • This spawns\nmultiple\nlines
\n\n

text

" - - assert_render "1. Hello", "
  1. Hello
" - assert_render "2. Hello", "
  1. Hello
" - assert_render "01. Hello\n02. World", "
  1. Hello
  2. World
" - assert_render "Params:\n 1. Foo\n 2. Bar", "

Params:

\n\n
  1. Foo
  2. Bar
" - - assert_render "1. This spawns\nmultiple\nlines\n\ntext", "
  1. This spawns\nmultiple\nlines
\n\n

text

" - assert_render "1. Two\nlines\n1. This spawns\nmultiple\nlines\n\ntext", "
  1. Two\nlines
  2. This spawns\nmultiple\nlines
\n\n

text

" - - assert_render "Hello [world](http://example.com)", %(

Hello world

) - assert_render "Hello [world](http://example.com)!", %(

Hello world!

) - assert_render "Hello [world **2**](http://example.com)!", %(

Hello world 2!

) - - assert_render "Hello ![world](http://example.com)", %(

Hello world

) - assert_render "Hello ![world](http://example.com)!", %(

Hello world!

) - - assert_render "[![foo](bar)](baz)", %(

foo

) - - assert_render "This [spawns\nmultiple\nlines](http://example.com)\n\ntext", - %(

This spawns\nmultiple\nlines

\n\n

text

) - - assert_render "***", "
" - assert_render "---", "
" - assert_render "___", "
" - assert_render " * * * ", "
" - - assert_render "hello < world", "

hello < world

" - - assert_render "Hello __[World](http://example.com)__!", %(

Hello World!

) - - # Markdown inside inline code should not be parsed (#7065) - assert_render "`[]()`", %(

[]()

) - assert_render "`*foo*`", %(

*foo*

) - assert_render "`_foo_`", %(

_foo_

) -end diff --git a/spec/compiler/crystal/tools/hierarchy_spec.cr b/spec/compiler/crystal/tools/hierarchy_spec.cr new file mode 100644 index 000000000000..2513807114db --- /dev/null +++ b/spec/compiler/crystal/tools/hierarchy_spec.cr @@ -0,0 +1,67 @@ +require "../../../spec_helper" + +describe Crystal::TextHierarchyPrinter do + it "works" do + program = semantic(%( + class Foo + end + + class Bar < Foo + end + ), inject_primitives: false).program + + output = String.build { |io| Crystal.print_hierarchy(program, io, "ar$", "text") } + output.should eq(<<-EOS) + - class Object (4 bytes) + | + +- class Reference (4 bytes) + | + +- class Foo (4 bytes) + | + +- class Bar (4 bytes)\n + EOS + end +end + +describe Crystal::JSONHierarchyPrinter do + it "works" do + program = semantic(%( + class Foo + end + + class Bar < Foo + end + ), inject_primitives: false).program + + output = String.build { |io| Crystal.print_hierarchy(program, io, "ar$", "json") } + JSON.parse(output).should eq(JSON.parse(<<-EOS)) + { + "name": "Object", + "kind": "class", + "size_in_bytes": 4, + "sub_types": [ + { + "name": "Reference", + "kind": "class", + "size_in_bytes": 4, + "sub_types": [ + { + "name": "Foo", + "kind": "class", + "size_in_bytes": 4, + "sub_types": [ + { + "name": "Bar", + "kind": "class", + "size_in_bytes": 4, + "sub_types": [] + } + ] + } + ] + } + ] + } + EOS + end +end diff --git a/spec/compiler/crystal_path/crystal_path_spec.cr b/spec/compiler/crystal_path/crystal_path_spec.cr index 8e5960dc54e2..058f2ecd156c 100644 --- a/spec/compiler/crystal_path/crystal_path_spec.cr +++ b/spec/compiler/crystal_path/crystal_path_spec.cr @@ -4,7 +4,7 @@ require "spec/helpers/iterate" private def assert_finds(search, results, relative_to = nil, path = __DIR__, file = __FILE__, line = __LINE__) it "finds #{search.inspect}", file, line do - crystal_path = Crystal::CrystalPath.new(path) + crystal_path = Crystal::CrystalPath.new([path]) results = results.map { |result| ::Path[__DIR__, result].normalize.to_s } Dir.cd(__DIR__) do matches = crystal_path.find search, relative_to: relative_to @@ -15,7 +15,7 @@ end private def assert_doesnt_find(search, relative_to = nil, path = __DIR__, expected_relative_to = nil, file = __FILE__, line = __LINE__) it "doesn't finds #{search.inspect}", file, line do - crystal_path = Crystal::CrystalPath.new(path) + crystal_path = Crystal::CrystalPath.new([path]) Dir.cd(__DIR__) do error = expect_raises Crystal::CrystalPath::NotFoundError do crystal_path.find search, relative_to: relative_to @@ -188,4 +188,24 @@ describe Crystal::CrystalPath do crystal_path.entries.should eq(%w(foo bar)) end end + + it ".expand_paths" do + paths = ["$ORIGIN/../foo"] + Crystal::CrystalPath.expand_paths(paths, "/usr/bin/") + paths.should eq ["/usr/bin/../foo"] + paths = ["./$ORIGIN/../foo"] + Crystal::CrystalPath.expand_paths(paths, "/usr/bin/") + paths.should eq ["./$ORIGIN/../foo"] + paths = ["$ORIGINfoo"] + Crystal::CrystalPath.expand_paths(paths, "/usr/bin/") + paths.should eq ["$ORIGINfoo"] + paths = ["lib", "$ORIGIN/../foo"] + Crystal::CrystalPath.expand_paths(paths, "/usr/bin/") + paths.should eq ["lib", "/usr/bin/../foo"] + + paths = ["$ORIGIN/../foo"] + expect_raises(Exception, "Missing executable path to expand $ORIGIN path") do + Crystal::CrystalPath.expand_paths(paths, nil) + end + end end diff --git a/spec/compiler/formatter/formatter_spec.cr b/spec/compiler/formatter/formatter_spec.cr index 545f2be3701d..e8a6f0b2b447 100644 --- a/spec/compiler/formatter/formatter_spec.cr +++ b/spec/compiler/formatter/formatter_spec.cr @@ -342,7 +342,20 @@ describe Crystal::Formatter do assert_format "#{keyword} 1", "#{keyword} 1" assert_format "#{keyword}( 1 , 2 )", "#{keyword}(1, 2)" assert_format "#{keyword} 1 , 2", "#{keyword} 1, 2" - assert_format "#{keyword} { 1 , 2 }", "#{keyword} {1, 2}" unless keyword == "yield" + assert_format "#{keyword} *1", "#{keyword} *1" + assert_format "#{keyword} 1 , *2", "#{keyword} 1, *2" + assert_format "#{keyword} *1 ,2", "#{keyword} *1, 2" + assert_format "#{keyword} *1 , *2", "#{keyword} *1, *2" + assert_format "#{keyword}( *1 , *2 )", "#{keyword}(*1, *2)" + + unless keyword == "yield" + assert_format "#{keyword} { 1 , 2 }", "#{keyword} {1, 2}" + assert_format "#{keyword} {1, 2}, 3" + assert_format "#{keyword} 1, {2, 3}" + assert_format "#{keyword} {1, 2}, {3, 4}" + assert_format "#{keyword} { {1, 2}, {3, 4} }" + assert_format "#{keyword} { {1, 2}, {3, 4} }, 5" + end end assert_format "yield 1\n2", "yield 1\n2" diff --git a/spec/compiler/lexer/lexer_spec.cr b/spec/compiler/lexer/lexer_spec.cr index 6a858baeaae8..612e16512da7 100644 --- a/spec/compiler/lexer/lexer_spec.cr +++ b/spec/compiler/lexer/lexer_spec.cr @@ -321,6 +321,9 @@ describe "Lexer" do assert_syntax_error "4F64", "unexpected token: F64" assert_syntax_error "0F32", "unexpected token: F32" + assert_syntax_error ".42", ".1 style number literal is not supported, put 0 before dot" + assert_syntax_error "-.42", ".1 style number literal is not supported, put 0 before dot" + it "lexes not instance var" do lexer = Lexer.new "!@foo" token = lexer.next_token diff --git a/spec/compiler/parser/parser_spec.cr b/spec/compiler/parser/parser_spec.cr index ef91531cd201..ecf628d086f9 100644 --- a/spec/compiler/parser/parser_spec.cr +++ b/spec/compiler/parser/parser_spec.cr @@ -707,6 +707,11 @@ module Crystal it_parses "#{keyword} 1 if true", If.new(true.bool, klass.new(1.int32)) it_parses "#{keyword} if true", If.new(true.bool, klass.new) + it_parses "#{keyword} *1", klass.new(TupleLiteral.new([1.int32.splat] of ASTNode)) + it_parses "#{keyword} *1, 2", klass.new(TupleLiteral.new([1.int32.splat, 2.int32])) + it_parses "#{keyword} 1, *2", klass.new(TupleLiteral.new([1.int32, 2.int32.splat])) + it_parses "#{keyword} *{1, 2}", klass.new(TupleLiteral.new([TupleLiteral.new([1.int32, 2.int32] of ASTNode).splat] of ASTNode)) + assert_syntax_error "a = #{keyword}", "void value expression" assert_syntax_error "a = 1; a += #{keyword}", "void value expression" assert_syntax_error "yield #{keyword}", "void value expression" @@ -927,6 +932,9 @@ module Crystal it_parses "macro foo\neenum\nend", Macro.new("foo", body: Expressions.from(["eenum\n".macro_literal] of ASTNode)) it_parses "macro foo\n'\\''\nend", Macro.new("foo", body: Expressions.from(["'\\''\n".macro_literal] of ASTNode)) + it_parses "macro foo\n'\\\\'\nend", Macro.new("foo", body: Expressions.from(["'\\\\'\n".macro_literal] of ASTNode)) + it_parses %(macro foo\n"\\'"\nend), Macro.new("foo", body: Expressions.from([%("\\'"\n).macro_literal] of ASTNode)) + it_parses %(macro foo\n"\\\\"\nend), Macro.new("foo", body: Expressions.from([%("\\\\"\n).macro_literal] of ASTNode)) assert_syntax_error "macro foo; {% foo = 1 }; end" assert_syntax_error "macro def foo : String; 1; end" diff --git a/spec/compiler/semantic/generic_class_spec.cr b/spec/compiler/semantic/generic_class_spec.cr index b85e0cf76b98..7c1750aa6cc5 100644 --- a/spec/compiler/semantic/generic_class_spec.cr +++ b/spec/compiler/semantic/generic_class_spec.cr @@ -123,6 +123,29 @@ describe "Semantic: generic class" do )) { int32 } end + it "doesn't compute generic instance var initializers in formal superclass's context (#4753)" do + assert_type(%( + class Foo(T) + @foo = T.new + + def foo + @foo + end + end + + class Bar(T) < Foo(T) + end + + class Baz + def baz + 1 + end + end + + Bar(Baz).new.foo.baz + ), inject_primitives: false) { int32 } + end + it "inherits non-generic to generic (1)" do assert_type(%( class Foo(T) @@ -1221,4 +1244,23 @@ describe "Semantic: generic class" do foo(Gen(Int32).new) )) { generic_class "Gen", int32 } end + + it "replaces type parameters in virtual metaclasses (#10691)" do + assert_type(%( + class Parent(T) + end + + class Child < Parent(Int32) + end + + class Foo(T) + end + + class Bar(T) + @foo = Foo(Parent(T).class).new + end + + Bar(Int32).new.@foo + ), inject_primitives: false) { generic_class("Foo", generic_class("Parent", int32).virtual_type.metaclass) } + end end diff --git a/spec/compiler/semantic/module_spec.cr b/spec/compiler/semantic/module_spec.cr index c85a0b777be9..56252bc36585 100644 --- a/spec/compiler/semantic/module_spec.cr +++ b/spec/compiler/semantic/module_spec.cr @@ -1419,18 +1419,122 @@ describe "Semantic: module" do it "errors when extending module that defines instance vars (#4065)" do assert_error %( module Foo - @foo : Int32? + @x = 0 + end - def foo - @foo - end + module Bar + extend Foo end + ), + "can't declare instance variables in Foo because Bar extends it" + end - class Bar + it "errors when extending module that defines instance vars (2) (#4065)" do + assert_error %( + module Foo + @x : Int32? + end + + module Bar extend Foo end ), - "can't declare instance variables in Bar.class" + "can't declare instance variables in Foo because Bar extends it" + end + + it "errors when extending generic module that defines instance vars" do + assert_error %( + module Foo(T) + @x = 0 + end + + module Bar(T) + extend Foo(T) + end + ), + "can't declare instance variables in Foo(T) because Bar(T) extends it" + end + + it "errors when extending generic module that defines instance vars (2)" do + assert_error %( + module Foo(T) + @x : T? + end + + module Bar(T) + extend Foo(T) + end + ), + "can't declare instance variables in Foo(T) because Bar(T) extends it" + end + + it "errors when recursively extending module that defines instance vars" do + assert_error %( + module Foo + @x = 0 + end + + module Bar + include Foo + end + + module Baz + extend Bar + end + ), + "can't declare instance variables in Foo because Baz extends it" + end + + it "errors when recursively extending module that defines instance vars (2)" do + assert_error %( + module Foo + @x : Int32? + end + + module Bar + include Foo + end + + module Baz + extend Bar + end + ), + "can't declare instance variables in Foo because Baz extends it" + end + + it "errors when extending self and self defines instance vars (#9568)" do + assert_error %( + module Foo + extend self + + @x = 0 + end + ), + "can't declare instance variables in Foo because Foo extends it" + end + + it "errors when extending self and self defines instance vars (2) (#9568)" do + assert_error %( + module Foo + extend self + + @x : Int32? + end + ), + "can't declare instance variables in Foo because Foo extends it" + end + + it "errors when extending self and self defines instance vars (3) (#9568)" do + assert_error %( + module Foo + extend self + + def initialize + @x = 0 + end + end + ), + "can't declare instance variables in Foo because Foo extends it" end it "can't pass module class to virtual metaclass (#6113)" do diff --git a/spec/compiler/semantic/return_spec.cr b/spec/compiler/semantic/return_spec.cr index ec9728dde0cc..8f14687fed15 100644 --- a/spec/compiler/semantic/return_spec.cr +++ b/spec/compiler/semantic/return_spec.cr @@ -181,4 +181,12 @@ describe "Semantic: return" do z )) { float64 } end + + it "forms a tuple from multiple return values" do + assert_type("def foo; return 1, 1.0; end; foo") { tuple_of([int32, float64]) } + end + + it "flattens splats inside multiple return values" do + assert_type("def foo; return 1, *{1.0, 'a'}, true; end; foo") { tuple_of([int32, float64, char, bool]) } + end end diff --git a/spec/compiler/semantic/splat_spec.cr b/spec/compiler/semantic/splat_spec.cr index 5430cd3ef0a2..152f8897a30e 100644 --- a/spec/compiler/semantic/splat_spec.cr +++ b/spec/compiler/semantic/splat_spec.cr @@ -54,13 +54,24 @@ describe "Semantic: splat" do "not yet supported" end - it "errors if splatting non-tuple type" do + it "errors if splatting non-tuple type in call arguments" do assert_error %( foo *1 ), "argument to splat must be a tuple, not Int32" end + it "errors if splatting non-tuple type in return values" do + assert_error %( + def foo + return *1 + end + + foo + ), + "argument to splat must be a tuple, not Int32" + end + it "forwards tuple with an extra argument" do assert_type(%( def foo(*args) @@ -76,6 +87,16 @@ describe "Semantic: splat" do )) { tuple_of [int32] of TypeVar } end + it "forwards tuple in return statement" do + assert_type(%( + def foo(*args) + return args, *args + end + + foo 1, 'a' + )) { tuple_of([tuple_of([int32, char]), int32, char]) } + end + it "can splat after type filter left it as a tuple (#442)" do assert_type(%( def output(x, y) diff --git a/spec/std/array_spec.cr b/spec/std/array_spec.cr index 5d28004fcfae..9c78e7f26bdc 100644 --- a/spec/std/array_spec.cr +++ b/spec/std/array_spec.cr @@ -696,53 +696,73 @@ describe "Array" do a.equals?(d, &f).should be_false end - describe "fill" do + describe "#fill" do it "replaces all values" do a = ['a', 'b', 'c'] expected = ['x', 'x', 'x'] - a.fill('x').should eq(expected) + a.fill('x').should be(a) + a.should eq(expected) + a = [1, 2, 3] expected = [0, 0, 0] - a.fill(0).should eq(expected) + a.fill(0).should be(a) + a.should eq(expected) + a = [1.0, 2.0, 3.0] - expected = [0, 0, 0] - a.fill(0).should eq(expected) + expected = [0.0, 0.0, 0.0] + a.fill(0.0).should be(a) + a.should eq(expected) end it "replaces only values between index and size" do a = ['a', 'b', 'c'] expected = ['x', 'x', 'c'] - a.fill('x', 0, 2).should eq(expected) + a.fill('x', 0, 2).should be(a) + a.should eq(expected) + a = [1, 2, 3] expected = [0, 0, 3] - a.fill(0, 0, 2).should eq(expected) + a.fill(0, 0, 2).should be(a) + a.should eq(expected) + a = [1.0, 2.0, 3.0] expected = [0, 0, 3] - a.fill(0, 0, 2).should eq(expected) + a.fill(0, 0, 2).should be(a) + a.should eq(expected) end it "replaces only values between index and size (2)" do a = ['a', 'b', 'c'] expected = ['a', 'x', 'x'] - a.fill('x', 1, 2).should eq(expected) + a.fill('x', 1, 2).should be(a) + a.should eq(expected) + a = [1, 2, 3] expected = [1, 0, 0] - a.fill(0, 1, 2).should eq(expected) + a.fill(0, 1, 2).should be(a) + a.should eq(expected) + a = [1.0, 2.0, 3.0] expected = [1, 0, 0] - a.fill(0, 1, 2).should eq(expected) + a.fill(0, 1, 2).should be(a) + a.should eq(expected) end it "replaces all values from index onwards" do a = ['a', 'b', 'c'] expected = ['a', 'x', 'x'] - a.fill('x', -2).should eq(expected) + a.fill('x', -2).should be(a) + a.should eq(expected) + a = [1, 2, 3] expected = [1, 0, 0] - a.fill(0, -2).should eq(expected) + a.fill(0, -2).should be(a) + a.should eq(expected) + a = [1.0, 2.0, 3.0] expected = [1, 0, 0] - a.fill(0, -2).should eq(expected) + a.fill(0, -2).should be(a) + a.should eq(expected) end it "raises when given big negative number (#4539)" do @@ -760,13 +780,18 @@ describe "Array" do it "replaces only values between negative index and size" do a = ['a', 'b', 'c'] expected = ['a', 'b', 'x'] - a.fill('x', -1, 1).should eq(expected) + a.fill('x', -1, 1).should be(a) + a.should eq(expected) + a = [1, 2, 3] expected = [1, 2, 0] - a.fill(0, -1, 1).should eq(expected) + a.fill(0, -1, 1).should be(a) + a.should eq(expected) + a = [1.0, 2.0, 3.0] expected = [1, 2, 0] - a.fill(0, -1, 1).should eq(expected) + a.fill(0, -1, 1).should be(a) + a.should eq(expected) end it "raises when given big negative number in from/count (#4539)" do @@ -784,37 +809,52 @@ describe "Array" do it "replaces only values in range" do a = ['a', 'b', 'c'] expected = ['x', 'x', 'c'] - a.fill('x', -3..1).should eq(expected) + a.fill('x', -3..1).should be(a) + a.should eq(expected) + a = [1, 2, 3] expected = [0, 0, 3] - a.fill(0, -3..1).should eq(expected) + a.fill(0, -3..1).should be(a) + a.should eq(expected) + a = [1.0, 2.0, 3.0] expected = [0, 0, 3] - a.fill(0, -3..1).should eq(expected) + a.fill(0, -3..1).should be(a) + a.should eq(expected) end it "replaces only values in range without end" do a = ['a', 'b', 'c'] expected = ['a', 'x', 'x'] - a.fill('x', 1..nil).should eq(expected) + a.fill('x', 1..nil).should be(a) + a.should eq(expected) + a = [1, 2, 3] expected = [1, 0, 0] - a.fill(0, 1..nil).should eq(expected) + a.fill(0, 1..nil).should be(a) + a.should eq(expected) + a = [1.0, 2.0, 3.0] expected = [1, 0, 0] - a.fill(0, 1..nil).should eq(expected) + a.fill(0, 1..nil).should be(a) + a.should eq(expected) end it "replaces only values in range begin" do a = ['a', 'b', 'c'] expected = ['x', 'x', 'c'] - a.fill('x', nil..1).should eq(expected) + a.fill('x', nil..1).should be(a) + a.should eq(expected) + a = [1, 2, 3] expected = [0, 0, 3] - a.fill(0, nil..1).should eq(expected) + a.fill(0, nil..1).should be(a) + a.should eq(expected) + a = [1.0, 2.0, 3.0] expected = [0, 0, 3] - a.fill(0, nil..1).should eq(expected) + a.fill(0, nil..1).should be(a) + a.should eq(expected) end it "works with a block" do @@ -1271,128 +1311,100 @@ describe "Array" do end describe "sort" do - [true, false].each do |stable| - describe "stable: #{stable}" do - it "sort without block" do + {% for sort in ["sort".id, "unstable_sort".id] %} + describe {{ "##{sort}" }} do + it "without block" do a = [3, 4, 1, 2, 5, 6] - b = a.sort(stable: stable) + b = a.{{ sort }} b.should eq([1, 2, 3, 4, 5, 6]) a.should_not eq(b) end - it "sort with a block" do + it "with a block" do a = ["foo", "a", "hello"] - b = a.sort(stable: stable) { |x, y| x.size <=> y.size } + b = a.{{ sort }} { |x, y| x.size <=> y.size } b.should eq(["a", "foo", "hello"]) a.should_not eq(b) end - end - end - - it "stable sort without block" do - is_stable_sort(mutable: false, &.sort(stable: true)) - end - it "stable sort with a block" do - is_stable_sort(mutable: false, &.sort(stable: true) { |a, b| a.value <=> b.value }) - end - - it "default is stable (without block)" do - is_stable_sort(mutable: false, &.sort) - end + {% if sort == "sort" %} + it "stable sort without a block" do + is_stable_sort(mutable: false, &.sort) + end - it "default is stable (with a block)" do - is_stable_sort(mutable: false, &.sort { |a, b| a.value <=> b.value }) - end - end + it "stable sort with a block" do + is_stable_sort(mutable: false, &.sort { |a, b| a.value <=> b.value }) + end + {% end %} + end - describe "sort!" do - [true, false].each do |stable| - describe "stable: #{stable}" do - it "sort! without block" do + describe {{ "##{sort}!" }} do + it "without block" do a = [3, 4, 1, 2, 5, 6] - a.sort!(stable: stable) + a.{{ sort.id }}! a.should eq([1, 2, 3, 4, 5, 6]) end - it "sort! with a block" do + it "with a block" do a = ["foo", "a", "hello"] - a.sort!(stable: stable) { |x, y| x.size <=> y.size } + a.{{ sort.id }}! { |x, y| x.size <=> y.size } a.should eq(["a", "foo", "hello"]) end - end - end - - it "stable sort! without block" do - is_stable_sort(mutable: true, &.sort!(stable: true)) - end - - it "stable sort! with a block" do - is_stable_sort(mutable: true, &.sort!(stable: true) { |a, b| a.value <=> b.value }) - end - it "default is stable (without block)" do - is_stable_sort(mutable: true, &.sort!) - end + {% if sort == "sort" %} + it "stable sort without a block" do + is_stable_sort(mutable: true, &.sort!) + end - it "default is stable (with a block)" do - is_stable_sort(mutable: true, &.sort! { |a, b| a.value <=> b.value }) - end - end + it "stable sort with a block" do + is_stable_sort(mutable: true, &.sort! { |a, b| a.value <=> b.value }) + end + {% end %} + end - describe "sort_by" do - [true, false].each do |stable| - describe "stable: #{stable}" do - it "sorts by" do + describe {{ "##{sort}_by" }} do + it "sorts" do a = ["foo", "a", "hello"] - b = a.sort_by(stable: stable, &.size) + b = a.{{ sort }}_by(&.size) b.should eq(["a", "foo", "hello"]) a.should_not eq(b) end it "unpacks tuple" do a = [{"d", 4}, {"a", 1}, {"c", 3}, {"e", 5}, {"b", 2}] - b = a.sort_by(stable: stable) { |x, y| y } + b = a.{{ sort }}_by { |x, y| y } b.should eq([{"a", 1}, {"b", 2}, {"c", 3}, {"d", 4}, {"e", 5}]) a.should_not eq(b) end - end - end - - it "stable sort by" do - is_stable_sort(mutable: false, &.sort_by(stable: true, &.value)) - end - it "default is stable" do - is_stable_sort(mutable: false, &.sort_by(&.value)) - end - end + {% if sort == "sort" %} + it "stable sort" do + is_stable_sort(mutable: false, &.sort_by(&.value)) + end + {% end %} + end - describe "sort_by!" do - [true, false].each do |stable| - describe "stable: #{stable}" do - it "sorts by!" do + describe {{ "##{sort}_by!" }} do + it "sorts" do a = ["foo", "a", "hello"] - a.sort_by!(stable: stable, &.size) + a.{{ sort }}_by!(&.size) a.should eq(["a", "foo", "hello"]) end it "calls given block exactly once for each element" do calls = Hash(String, Int32).new(0) a = ["foo", "a", "hello"] - a.sort_by!(stable: stable) { |e| calls[e] += 1; e.size } + a.{{ sort }}_by! { |e| calls[e] += 1; e.size } calls.should eq({"foo" => 1, "a" => 1, "hello" => 1}) end - end - end - it "stable sort by!" do - is_stable_sort(mutable: true, &.sort_by!(stable: true, &.value)) - end - - it "default is stable" do - is_stable_sort(mutable: true, &.sort_by!(&.value)) - end + {% if sort == "sort" %} + it "stable sort" do + is_stable_sort(mutable: true, &.sort_by!(&.value)) + end + {% end %} + end + {% end %} end describe "swap" do @@ -1952,6 +1964,16 @@ describe "Array" do expect_raises(IndexError) { [[1], [1, 2]].transpose } expect_raises(IndexError) { [[1, 2], [1]].transpose } end + + it "transposes array of tuples" do + [{1, 1.0}].transpose.should eq([[1], [1.0]]) + [{1}, {1.0}].transpose.should eq([[1, 1.0]]) + [{1, 1.0}, {'a', "aaa"}].transpose.should eq([[1, 'a'], [1.0, "aaa"]]) + + typeof([{1, 1.0}].transpose).should eq(Array(Array(Int32 | Float64))) + typeof([{1}, {1.0}].transpose).should eq(Array(Array(Int32 | Float64))) + typeof([{1, 1.0}, {'a', "aaa"}].transpose).should eq(Array(Array(String | Int32 | Float64 | Char))) + end end describe "rotate" do diff --git a/spec/std/big/big_decimal_spec.cr b/spec/std/big/big_decimal_spec.cr index 4b211ef80940..05957fa6ece8 100644 --- a/spec/std/big/big_decimal_spec.cr +++ b/spec/std/big/big_decimal_spec.cr @@ -318,6 +318,7 @@ describe BigDecimal do BigDecimal.new(0).to_s.should eq "0" BigDecimal.new(1).to_s.should eq "1" BigDecimal.new(-1).to_s.should eq "-1" + BigDecimal.new("8.5").to_s.should eq "8.5" BigDecimal.new("-0.35").to_s.should eq "-0.35" BigDecimal.new("-.35").to_s.should eq "-0.35" BigDecimal.new("0.01").to_s.should eq "0.01" diff --git a/spec/std/big/big_int_spec.cr b/spec/std/big/big_int_spec.cr index d653f75523ac..92a483c69962 100644 --- a/spec/std/big/big_int_spec.cr +++ b/spec/std/big/big_int_spec.cr @@ -1,6 +1,13 @@ require "spec" require "big" +private def it_converts_to_s(num, str, *, file = __FILE__, line = __LINE__, **opts) + it file: file, line: line do + num.to_s(**opts).should eq(str), file: file, line: line + String.build { |io| num.to_s(io, **opts) }.should eq(str), file: file, line: line + end +end + describe "BigInt" do it "creates with a value of zero" do BigInt.new.to_s.should eq("0") @@ -328,14 +335,102 @@ describe "BigInt" do result.to_s.should eq("10715086071862673209484250490600018105614048117055336074437503883703510511249361224931983788156958581275946729175531468251871452856923140435984577574698574803934567774824230985421074605062371141877954182153046474983581941267398767559165543946077062914571196477686542167660429831652624386837205668069376") end - it "does to_s in the given base" do - a = BigInt.new("1234567890123456789") - b = "1000100100010000100001111010001111101111010011000000100010101" - c = "112210f47de98115" - d = "128gguhuuj08l" - a.to_s(2).should eq(b) - a.to_s(16).should eq(c) - a.to_s(32).should eq(d) + describe "#to_s" do + context "base and upcase parameters" do + a = BigInt.new("1234567890123456789") + it_converts_to_s a, "1000100100010000100001111010001111101111010011000000100010101", base: 2 + it_converts_to_s a, "112210f47de98115", base: 16 + it_converts_to_s a, "112210F47DE98115", base: 16, upcase: true + it_converts_to_s a, "128gguhuuj08l", base: 32 + it_converts_to_s a, "128GGUHUUJ08L", base: 32, upcase: true + it_converts_to_s a, "1tckI1NfUnH", base: 62 + + # ensure case is same as for primitive integers + it_converts_to_s 10.to_big_i, 10.to_s(62), base: 62 + + it_converts_to_s (-a), "-1000100100010000100001111010001111101111010011000000100010101", base: 2 + it_converts_to_s (-a), "-112210f47de98115", base: 16 + it_converts_to_s (-a), "-112210F47DE98115", base: 16, upcase: true + it_converts_to_s (-a), "-128gguhuuj08l", base: 32 + it_converts_to_s (-a), "-128GGUHUUJ08L", base: 32, upcase: true + it_converts_to_s (-a), "-1tckI1NfUnH", base: 62 + + it_converts_to_s 16.to_big_i ** 1000, "1#{"0" * 1000}", base: 16 + + it "raises on base 1" do + expect_raises(ArgumentError, "Invalid base 1") { a.to_s(1) } + expect_raises(ArgumentError, "Invalid base 1") { a.to_s(IO::Memory.new, 1) } + end + + it "raises on base 37" do + expect_raises(ArgumentError, "Invalid base 37") { a.to_s(37) } + expect_raises(ArgumentError, "Invalid base 37") { a.to_s(IO::Memory.new, 37) } + end + + it "raises on base 62 with upcase" do + expect_raises(ArgumentError, "upcase must be false for base 62") { a.to_s(62, upcase: true) } + expect_raises(ArgumentError, "upcase must be false for base 62") { a.to_s(IO::Memory.new, 62, upcase: true) } + end + end + + context "precision parameter" do + it_converts_to_s 0.to_big_i, "", precision: 0 + it_converts_to_s 0.to_big_i, "0", precision: 1 + it_converts_to_s 0.to_big_i, "00", precision: 2 + it_converts_to_s 0.to_big_i, "00000", precision: 5 + it_converts_to_s 0.to_big_i, "0" * 200, precision: 200 + + it_converts_to_s 1.to_big_i, "1", precision: 0 + it_converts_to_s 1.to_big_i, "1", precision: 1 + it_converts_to_s 1.to_big_i, "01", precision: 2 + it_converts_to_s 1.to_big_i, "00001", precision: 5 + it_converts_to_s 1.to_big_i, "#{"0" * 199}1", precision: 200 + + it_converts_to_s 2.to_big_i, "2", precision: 0 + it_converts_to_s 2.to_big_i, "2", precision: 1 + it_converts_to_s 2.to_big_i, "02", precision: 2 + it_converts_to_s 2.to_big_i, "00002", precision: 5 + it_converts_to_s 2.to_big_i, "#{"0" * 199}2", precision: 200 + + it_converts_to_s (-1).to_big_i, "-1", precision: 0 + it_converts_to_s (-1).to_big_i, "-1", precision: 1 + it_converts_to_s (-1).to_big_i, "-01", precision: 2 + it_converts_to_s (-1).to_big_i, "-00001", precision: 5 + it_converts_to_s (-1).to_big_i, "-#{"0" * 199}1", precision: 200 + + it_converts_to_s 85.to_big_i, "85", precision: 0 + it_converts_to_s 85.to_big_i, "85", precision: 1 + it_converts_to_s 85.to_big_i, "85", precision: 2 + it_converts_to_s 85.to_big_i, "085", precision: 3 + it_converts_to_s 85.to_big_i, "0085", precision: 4 + it_converts_to_s 85.to_big_i, "00085", precision: 5 + it_converts_to_s 85.to_big_i, "#{"0" * 198}85", precision: 200 + + it_converts_to_s (-85).to_big_i, "-85", precision: 0 + it_converts_to_s (-85).to_big_i, "-85", precision: 1 + it_converts_to_s (-85).to_big_i, "-85", precision: 2 + it_converts_to_s (-85).to_big_i, "-085", precision: 3 + it_converts_to_s (-85).to_big_i, "-0085", precision: 4 + it_converts_to_s (-85).to_big_i, "-00085", precision: 5 + it_converts_to_s (-85).to_big_i, "-#{"0" * 198}85", precision: 200 + + it_converts_to_s 123.to_big_i, "123", precision: 0 + it_converts_to_s 123.to_big_i, "123", precision: 1 + it_converts_to_s 123.to_big_i, "123", precision: 2 + it_converts_to_s 123.to_big_i, "00123", precision: 5 + it_converts_to_s 123.to_big_i, "#{"0" * 197}123", precision: 200 + + a = 2.to_big_i ** 1024 - 1 + it_converts_to_s a, "#{"1" * 1024}", base: 2, precision: 1023 + it_converts_to_s a, "#{"1" * 1024}", base: 2, precision: 1024 + it_converts_to_s a, "0#{"1" * 1024}", base: 2, precision: 1025 + it_converts_to_s a, "#{"0" * 976}#{"1" * 1024}", base: 2, precision: 2000 + + it_converts_to_s (-a), "-#{"1" * 1024}", base: 2, precision: 1023 + it_converts_to_s (-a), "-#{"1" * 1024}", base: 2, precision: 1024 + it_converts_to_s (-a), "-0#{"1" * 1024}", base: 2, precision: 1025 + it_converts_to_s (-a), "-#{"0" * 976}#{"1" * 1024}", base: 2, precision: 2000 + end end it "does to_big_f" do diff --git a/spec/std/bit_array_spec.cr b/spec/std/bit_array_spec.cr index 82760fbe87b6..055e02f092c6 100644 --- a/spec/std/bit_array_spec.cr +++ b/spec/std/bit_array_spec.cr @@ -8,6 +8,13 @@ private def from_int(size : Int32, int : Int) ba end +private def assert_no_unused_bits(ba : BitArray, *, file = __FILE__, line = __LINE__) + bit_count = 32 * ((ba.size - 1) // 32 + 1) + (ba.size...bit_count).each do |index| + ba.unsafe_fetch(index).should be_false, file: file, line: line + end +end + describe "BitArray" do it "has size" do ary = BitArray.new(100) @@ -241,17 +248,112 @@ describe "BitArray" do ba[28..40].should eq(from_int(13, 0b11111_11111111)) end + + it "does not cause overflow (#8494)" do + ba = BitArray.new(64, true) + ba[0] = false + ba[33] = false + ba[0, 32].should eq(from_int(32, 0b01111111_11111111_11111111_11111111_u32)) + ba[1, 32].should eq(from_int(32, 0b11111111_11111111_11111111_11111111_u32)) + ba[2, 32].should eq(from_int(32, 0b11111111_11111111_11111111_11111110_u32)) + end + + it "zeroes unused bits" do + ba = BitArray.new(32, true) + assert_no_unused_bits ba[0, 26] + assert_no_unused_bits ba[7, 11] + + ba = BitArray.new(64, true) + assert_no_unused_bits ba[0, 26] + assert_no_unused_bits ba[0, 33] + assert_no_unused_bits ba[7, 53] + + ba = BitArray.new(100, true) + assert_no_unused_bits ba[60, 26] + assert_no_unused_bits ba[0, 97] + + ba = BitArray.new(38, true) + ba[0, 34].should eq(BitArray.new(34, true)) + end end - it "toggles a bit" do - ary = BitArray.new(32) - ary[3].should be_false + describe "#toggle" do + it "toggles a bit" do + ary = BitArray.new(32) + ary[3].should be_false + + ary.toggle(3) + ary[3].should be_true + + ary.toggle(3) + ary[3].should be_false + end + + it "toggles with index and count" do + ary = from_int(4, 0b0011) + ary.toggle(1, 2) + ary.should eq(from_int(4, 0b0101)) + + ary = from_int(40, 0b00110011_01010101) + ary.toggle(30, 6) + ary[24..].should eq(from_int(16, 0b00110000_10100101)) + + ary = from_int(32, 0b10000000_00000000_00000000_00000001) + ary.toggle(0, 32) + ary.should eq(from_int(32, 0b01111111_11111111_11111111_11111110)) + end + + it "toggles with index and count, not enough bits" do + ary = from_int(4, 0b0011) + ary.toggle(1, 5) + ary.should eq(from_int(4, 0b0100)) + (4..31).each { |i| ary.unsafe_fetch(i).should be_false } + + ary = from_int(40, 0b00110011_01010101) + ary.toggle(30, 12) + ary[24..].should eq(from_int(16, 0b00110000_10101010)) + (40..63).each { |i| ary.unsafe_fetch(i).should be_false } + end + + it "toggles with index == size and count" do + ary = from_int(4, 0b0011) + ary.toggle(4, 2) + ary.should eq(from_int(4, 0b0011)) + (4..31).each { |i| ary.unsafe_fetch(i).should be_false } + + ary = from_int(40, 0b00110011_01010101) + ary.toggle(40, 6) + ary[24..].should eq(from_int(16, 0b00110011_01010101)) + (40..63).each { |i| ary.unsafe_fetch(i).should be_false } + end + + it "toggles with index < 0 and count" do + ary = from_int(4, 0b0011) + ary.toggle(-3, 2) + ary.should eq(from_int(4, 0b0101)) - ary.toggle(3) - ary[3].should be_true + ary = from_int(40, 0b00110011_01010101) + ary.toggle(-10, 6) + ary[24..].should eq(from_int(16, 0b00110000_10100101)) + end + + it "raises on out of bound index" do + expect_raises(IndexError) { BitArray.new(2).toggle(2) } + expect_raises(IndexError) { BitArray.new(2).toggle(-3) } + + expect_raises(IndexError) { BitArray.new(2).toggle(3, 1) } + expect_raises(IndexError) { BitArray.new(2).toggle(-3, 1) } + end - ary.toggle(3) - ary[3].should be_false + it "raises on negative count" do + expect_raises(ArgumentError) { BitArray.new(2).toggle(0, -1) } + end + + it "toggles with range" do + ary = from_int(40, 0b00110011_01010101) + ary.toggle(30..35) + ary[24..].should eq(from_int(16, 0b00110000_10100101)) + end end it "inverts all bits" do @@ -260,7 +362,7 @@ describe "BitArray" do ary.invert ary.all?.should be_true - (100..127).each { |i| ary.unsafe_fetch(i).should be_false } + assert_no_unused_bits ary ary[50] = false ary[33] = false @@ -293,7 +395,7 @@ describe "BitArray" do it "initializes with unused bits cleared" do ary = BitArray.new(3, true) - (0...32).each { |i| ary.unsafe_fetch(i).should eq(i < ary.size) } + assert_no_unused_bits ary end it "reads bits from slice" do diff --git a/spec/std/enumerable_spec.cr b/spec/std/enumerable_spec.cr index c126d532c7f4..b5f7eb6344f5 100644 --- a/spec/std/enumerable_spec.cr +++ b/spec/std/enumerable_spec.cr @@ -615,6 +615,20 @@ describe "Enumerable" do result = ([] of Int32).reduce(10) { |memo, i| memo + i } result.should eq 10 end + + it "allows block return type to be different from element type" do + [1, 2, 3].reduce { |x, y| "#{x}-#{y}" }.should eq("1-2-3") + [1].reduce { |x, y| "#{x}-#{y}" }.should eq(1) + {1}.reduce { |x, y| "#{x}-#{y}" }.should eq(1) + + expect_raises Enumerable::EmptyError do + ([] of Int32).reduce { |x, y| "#{x}-#{y}" } + end + + expect_raises Enumerable::EmptyError do + Tuple.new.reduce { |x, y| "#{x}-#{y}" } + end + end end describe "reduce?" do @@ -623,6 +637,14 @@ describe "Enumerable" do it "returns nil if empty" do ([] of Int32).reduce? { |memo, i| memo + i }.should be_nil end + + it "allows block return type to be different from element type" do + [1, 2, 3].reduce? { |x, y| "#{x}-#{y}" }.should eq("1-2-3") + [1].reduce? { |x, y| "#{x}-#{y}" }.should eq(1) + {1}.reduce? { |x, y| "#{x}-#{y}" }.should eq(1) + ([] of Int32).reduce? { |x, y| "#{x}-#{y}" }.should be_nil + Tuple.new.reduce? { |x, y| "#{x}-#{y}" }.should be_nil + end end describe "#accumulate" do @@ -932,6 +954,12 @@ describe "Enumerable" do ints.should eq([1, 3]) ints.should be_a(Array(Int32)) end + + it "with type, for tuples" do + ints = {1, true, false, 3}.reject(Int32) + ints.should eq([true, false]) + ints.should be_a(Array(Bool)) + end end describe "sample" do @@ -1168,6 +1196,10 @@ describe "Enumerable" do hash = Tuple.new({:a, 1}, {:c, 2}).to_h hash.should be_a(Hash(Symbol, Int32)) hash.should eq({:a => 1, :c => 2}) + + hash = Tuple.new({1, 1.0}, {'a', "aaa"}).to_h + hash.should be_a(Hash(Int32 | Char, Float64 | String)) + hash.should eq({1 => 1.0, 'a' => "aaa"}) end it "for array" do diff --git a/spec/std/file_spec.cr b/spec/std/file_spec.cr index fdf071bf0cb0..3c633dfab601 100644 --- a/spec/std/file_spec.cr +++ b/spec/std/file_spec.cr @@ -1315,6 +1315,7 @@ describe "File" do describe ".match?" do it "matches basics" do + File.match?("abc", Path["abc"]).should be_true File.match?("abc", "abc").should be_true File.match?("*", "abc").should be_true File.match?("*c", "abc").should be_true diff --git a/spec/std/file_utils_spec.cr b/spec/std/file_utils_spec.cr index bb0c02a50ed2..aeda85a99ced 100644 --- a/spec/std/file_utils_spec.cr +++ b/spec/std/file_utils_spec.cr @@ -1,176 +1,241 @@ require "./spec_helper" require "file_utils" +private def test_with_string_and_path(*paths, &) + yield *paths + yield *paths.map { |path| Path[path] } +end + describe "FileUtils" do - describe "cd" do + describe ".cd" do it "should work" do cwd = Dir.current - FileUtils.cd("..") - Dir.current.should_not eq(cwd) - FileUtils.cd(cwd) - Dir.current.should eq(cwd) + test_with_string_and_path(cwd) do |arg| + FileUtils.cd("..") + Dir.current.should_not eq(cwd) + FileUtils.cd(arg) + Dir.current.should eq(cwd) + end end it "raises" do - expect_raises(File::NotFoundError, "Error while changing directory: '/nope'") do - FileUtils.cd("/nope") + test_with_string_and_path "/nope" do |arg| + expect_raises(File::NotFoundError, "Error while changing directory: '/nope'") do + FileUtils.cd(arg) + end end end it "accepts a block" do cwd = Dir.current + test_with_string_and_path("..") do |arg| + FileUtils.cd(arg) do + Dir.current.should_not eq(cwd) + end - FileUtils.cd("..") do - Dir.current.should_not eq(cwd) + Dir.current.should eq(cwd) end - - Dir.current.should eq(cwd) end end - describe "pwd" do + describe ".pwd" do it "returns the current working directory" do FileUtils.pwd.should eq(Dir.current) end end - describe "cmp" do + describe ".cmp" do it "compares two equal files" do - FileUtils.cmp( - datapath("test_file.txt"), - datapath("test_file.txt") - ).should be_true + test_with_string_and_path(datapath("test_file.txt")) do |arg| + FileUtils.cmp(arg, arg).should be_true + end end it "compares two different files" do - FileUtils.cmp( - datapath("test_file.txt"), - datapath("test_file.ini") - ).should be_false + test_with_string_and_path(datapath("test_file.txt"), datapath("test_file.ini")) do |*args| + FileUtils.cmp(*args).should be_false + end end end - describe "touch" do + describe ".touch" do it "creates file if it doesn't exist" do with_tempfile("touch.txt") do |path| - File.exists?(path).should be_false - FileUtils.touch(path) - File.exists?(path).should be_true + test_with_string_and_path(path) do |arg| + File.exists?(path).should be_false + FileUtils.touch(arg) + File.exists?(path).should be_true + + FileUtils.rm_rf path + end end end it "creates multiple files if they don't exists" do with_tempfile("touch1", "touch2", "touch3") do |path1, path2, path3| - paths = [path1, path2, path3] - paths.each { |path| File.exists?(path).should be_false } - FileUtils.touch(paths) - paths.each { |path| File.exists?(path).should be_true } + paths = {path1, path2, path3} + test_with_string_and_path(*paths) do |*args| + paths.each { |path| File.exists?(path).should be_false } + FileUtils.touch(args.to_a) + paths.each { |path| File.exists?(path).should be_true } + + FileUtils.rm_rf paths + end end end end - describe "cp" do + describe ".cp" do it "copies a file" do src_path = datapath("test_file.txt") with_tempfile("cp.txt") do |out_path| - FileUtils.cp(src_path, out_path) - File.exists?(out_path).should be_true - FileUtils.cmp(src_path, out_path).should be_true + test_with_string_and_path(src_path, out_path) do |*args| + File.exists?(out_path).should be_false + FileUtils.cp(*args) + File.exists?(out_path).should be_true + FileUtils.cmp(src_path, out_path).should be_true + + FileUtils.rm_rf(out_path) + end end end pending_win32 "copies permissions" do with_tempfile("cp-permissions-src.txt", "cp-permissions-out.txt") do |src_path, out_path| - File.write(src_path, "foo") - File.chmod(src_path, 0o700) + test_with_string_and_path(src_path, out_path) do |*args| + File.write(src_path, "foo") + File.chmod(src_path, 0o700) - FileUtils.cp(src_path, out_path) + FileUtils.cp(*args) - File.info(out_path).permissions.should eq(File::Permissions.new(0o700)) - FileUtils.cmp(src_path, out_path).should be_true + File.info(out_path).permissions.should eq(File::Permissions.new(0o700)) + FileUtils.cmp(src_path, out_path).should be_true + + FileUtils.rm_rf(out_path) + end end end it "raises an error if the directory doesn't exist" do expect_raises(ArgumentError, "No such directory : not_existing_dir") do - FileUtils.cp({datapath("test_file.txt")}, "not_existing_dir") + test_with_string_and_path(datapath("test_file.txt"), "not_existing_dir") do |src_path, dest_path| + FileUtils.cp({src_path}, dest_path) + end end end it "copies multiple files" do - src_name1 = "test_file.txt" - src_name2 = "test_file.ini" + name1 = "test_file.txt" + name2 = "test_file.ini" src_path = datapath + src_name1 = File.join(src_path, name1) + src_name2 = File.join(src_path, name2) with_tempfile("cp-multiple") do |out_path| - Dir.mkdir_p(out_path) - FileUtils.cp({File.join(src_path, src_name1), File.join(src_path, src_name2)}, out_path) - File.exists?(File.join(out_path, src_name1)).should be_true - File.exists?(File.join(out_path, src_name2)).should be_true - FileUtils.cmp(File.join(src_path, src_name1), File.join(out_path, src_name1)).should be_true - FileUtils.cmp(File.join(src_path, src_name2), File.join(out_path, src_name2)).should be_true + out_name1 = File.join(out_path, name1) + out_name2 = File.join(out_path, name2) + test_with_string_and_path(src_name1, src_name2, out_path) do |arg1, arg2, dest_arg| + Dir.mkdir_p(out_path) + + File.exists?(out_name1).should be_false + File.exists?(out_name2).should be_false + + FileUtils.cp({arg1, arg2}, dest_arg) + + File.exists?(out_name1).should be_true + File.exists?(out_name2).should be_true + FileUtils.cmp(src_name1, out_name1).should be_true + FileUtils.cmp(src_name2, out_name2).should be_true + + FileUtils.rm_rf(out_path) + end end end end - describe "cp_r" do + describe ".cp_r" do it "copies a directory recursively" do with_tempfile("cp_r-test", "cp_r-test-copied") do |src_path, dest_path| - Dir.mkdir_p(src_path) - File.write(File.join(src_path, "a"), "") - Dir.mkdir(File.join(src_path, "b")) - File.write(File.join(src_path, "b/c"), "") - - FileUtils.cp_r(src_path, dest_path) - File.exists?(File.join(dest_path, "a")).should be_true - File.exists?(File.join(dest_path, "b/c")).should be_true + test_with_string_and_path(src_path, dest_path) do |*args| + File.exists?(File.join(dest_path, "a")).should be_false + File.exists?(File.join(dest_path, "b/c")).should be_false + Dir.mkdir_p(src_path) + File.write(File.join(src_path, "a"), "") + Dir.mkdir(File.join(src_path, "b")) + File.write(File.join(src_path, "b/c"), "") + + FileUtils.cp_r(*args) + File.exists?(File.join(dest_path, "a")).should be_true + File.exists?(File.join(dest_path, "b/c")).should be_true + + FileUtils.rm_rf(src_path) + FileUtils.rm_rf(dest_path) + end end end it "copies a directory recursively if destination exists and is empty" do with_tempfile("cp_r-test", "cp_r-test-copied") do |src_path, dest_path| - Dir.mkdir_p(dest_path) - - Dir.mkdir_p(src_path) - File.write(File.join(src_path, "a"), "") - Dir.mkdir(File.join(src_path, "b")) - File.write(File.join(src_path, "b/c"), "") - - FileUtils.cp_r(src_path, dest_path) - File.exists?(File.join(dest_path, "cp_r-test", "a")).should be_true - File.exists?(File.join(dest_path, "cp_r-test", "b/c")).should be_true + test_with_string_and_path(src_path, dest_path) do |*args| + Dir.mkdir_p(dest_path) + + Dir.mkdir_p(src_path) + File.exists?(File.join(dest_path, "cp_r-test", "a")).should be_false + File.exists?(File.join(dest_path, "cp_r-test", "b/c")).should be_false + File.write(File.join(src_path, "a"), "") + Dir.mkdir(File.join(src_path, "b")) + File.write(File.join(src_path, "b/c"), "") + + FileUtils.cp_r(*args) + File.exists?(File.join(dest_path, "cp_r-test", "a")).should be_true + File.exists?(File.join(dest_path, "cp_r-test", "b/c")).should be_true + + FileUtils.rm_rf(src_path) + FileUtils.rm_rf(dest_path) + end end end it "copies a directory recursively if destination exists leaving existing files" do with_tempfile("cp_r-test", "cp_r-test-copied") do |src_path, dest_path| - Dir.mkdir_p(dest_path) - File.write(File.join(dest_path, "d"), "") - Dir.mkdir(File.join(dest_path, "cp_r-test")) - Dir.mkdir(File.join(dest_path, "cp_r-test", "b")) - - Dir.mkdir_p(src_path) - File.write(File.join(src_path, "a"), "") - Dir.mkdir(File.join(src_path, "b")) - File.write(File.join(src_path, "b/c"), "") - - FileUtils.cp_r(src_path, dest_path) - File.exists?(File.join(dest_path, "cp_r-test", "a")).should be_true - File.exists?(File.join(dest_path, "cp_r-test", "b/c")).should be_true - File.exists?(File.join(dest_path, "d")).should be_true + test_with_string_and_path(src_path, dest_path) do |*args| + Dir.mkdir_p(dest_path) + File.write(File.join(dest_path, "d"), "") + Dir.mkdir(File.join(dest_path, "cp_r-test")) + Dir.mkdir(File.join(dest_path, "cp_r-test", "b")) + + File.exists?(File.join(dest_path, "cp_r-test", "a")).should be_false + File.exists?(File.join(dest_path, "cp_r-test", "b/c")).should be_false + File.exists?(File.join(dest_path, "d")).should be_true + + Dir.mkdir_p(src_path) + File.write(File.join(src_path, "a"), "") + Dir.mkdir(File.join(src_path, "b")) + File.write(File.join(src_path, "b/c"), "") + + FileUtils.cp_r(*args) + File.exists?(File.join(dest_path, "cp_r-test", "a")).should be_true + File.exists?(File.join(dest_path, "cp_r-test", "b/c")).should be_true + File.exists?(File.join(dest_path, "d")).should be_true + + FileUtils.rm_rf(src_path) + FileUtils.rm_rf(dest_path) + end end end end - describe "rm_r" do + describe ".rm_r" do it "deletes a directory recursively" do with_tempfile("rm_r") do |path| - Dir.mkdir(path) - File.write(File.join(path, "a"), "") - Dir.mkdir(File.join(path, "b")) - File.write(File.join(path, "b/c"), "") - - FileUtils.rm_r(path) - Dir.exists?(path).should be_false + test_with_string_and_path(path) do |arg| + Dir.mkdir(path) + File.write(File.join(path, "a"), "") + Dir.mkdir(File.join(path, "b")) + File.write(File.join(path, "b/c"), "") + + FileUtils.rm_r(arg) + Dir.exists?(path).should be_false + end end end @@ -179,99 +244,126 @@ describe "FileUtils" do link_path = File.join(removed_path, "link") file_path = File.join(linked_path, "file") - Dir.mkdir(removed_path) - Dir.mkdir(linked_path) - File.symlink(linked_path, link_path) - File.write(file_path, "") + test_with_string_and_path(removed_path) do |arg| + Dir.mkdir(removed_path) + Dir.mkdir(linked_path) + File.symlink(linked_path, link_path) + File.write(file_path, "") + + Dir.exists?(removed_path).should be_true + Dir.exists?(linked_path).should be_true + File.exists?(file_path).should be_true + + FileUtils.rm_r(arg) + Dir.exists?(removed_path).should be_false + Dir.exists?(linked_path).should be_true + File.exists?(file_path).should be_true - FileUtils.rm_r(removed_path) - Dir.exists?(removed_path).should be_false - Dir.exists?(linked_path).should be_true - File.exists?(file_path).should be_true + FileUtils.rm_rf(linked_path) + end end end end - describe "rm_rf" do + describe ".rm_rf" do it "delete recursively a directory" do with_tempfile("rm_rf") do |path| - FileUtils.mkdir(path) - File.write(File.join(path, "a"), "") - FileUtils.mkdir(File.join(path, "b")) - FileUtils.rm_rf(path).should be_nil - Dir.exists?(path).should be_false + test_with_string_and_path(path) do |arg| + FileUtils.mkdir(path) + File.write(File.join(path, "a"), "") + FileUtils.mkdir(File.join(path, "b")) + FileUtils.rm_rf(arg).should be_nil + Dir.exists?(path).should be_false + end end end it "delete recursively multiple directory" do with_tempfile("rm_rf-multi1", "rm_rf-multi2") do |path1, path2| - FileUtils.mkdir(path1) - FileUtils.mkdir(path2) - File.write(File.join(path1, "a"), "") - File.write(File.join(path2, "a"), "") - FileUtils.mkdir(File.join(path1, "b")) - FileUtils.mkdir(File.join(path2, "b")) - FileUtils.rm_rf([path1, path2]).should be_nil - Dir.exists?(path1).should be_false - Dir.exists?(path2).should be_false + test_with_string_and_path(path1, path2) do |*args| + FileUtils.mkdir(path1) + FileUtils.mkdir(path2) + File.write(File.join(path1, "a"), "") + File.write(File.join(path2, "a"), "") + FileUtils.mkdir(File.join(path1, "b")) + FileUtils.mkdir(File.join(path2, "b")) + FileUtils.rm_rf(args.to_a).should be_nil + Dir.exists?(path1).should be_false + Dir.exists?(path2).should be_false + end end end it "doesn't return error on non existing file" do with_tempfile("rm_rf-nonexistent") do |path| - FileUtils.rm_rf(path).should be_nil + test_with_string_and_path(path) do |arg| + FileUtils.rm_rf(arg).should be_nil + end end end it "doesn't return error on non existing files" do with_tempfile("rm_rf-nonexistent") do |path1| path2 = File.join(path1, "a") - FileUtils.mkdir(path1) - FileUtils.rm_rf([path1, path2]).should be_nil + test_with_string_and_path(path1, path2) do |*args| + FileUtils.mkdir(path1) + FileUtils.rm_rf(args.to_a).should be_nil + end end end end - describe "mv" do + describe ".mv" do it "moves a file from one place to another" do with_tempfile("mv1", "mv2") do |path1, path2| - FileUtils.mkdir([path1, path2]) - path1 = File.join(path1, "a") - path2 = File.join(path2, "b") - File.write(path1, "") - FileUtils.mv(path1, path2).should be_nil - File.exists?(path1).should be_false - File.exists?(path2).should be_true + a = File.join(path1, "a") + b = File.join(path2, "b") + test_with_string_and_path(a, b) do |*args| + FileUtils.mkdir([path1, path2]) + File.write(a, "") + FileUtils.mv(*args).should be_nil + File.exists?(a).should be_false + File.exists?(b).should be_true + FileUtils.rm_rf(path1) + FileUtils.rm_rf(path2) + end end end it "raises an error if non correct arguments" do with_tempfile("mv-nonexistent") do |path| - expect_raises(File::NotFoundError, "Error renaming file: '#{File.join(path, "a").inspect_unquoted}' -> '#{File.join(path, "b").inspect_unquoted}'") do - FileUtils.mv(File.join(path, "a"), File.join(path, "b")) + test_with_string_and_path(File.join(path, "a"), File.join(path, "b")) do |*args| + expect_raises(File::NotFoundError, "Error renaming file: '#{File.join(path, "a").inspect_unquoted}' -> '#{File.join(path, "b").inspect_unquoted}'") do + FileUtils.mv(*args) + end end end end it "moves multiple files to one place" do with_tempfile("mv-multi1", "mv-multi2", "mv-multi3") do |path1, path2, path3| - FileUtils.mkdir([path1, path2, path3]) - path1 = File.join(path1, "a") - path2 = File.join(path2, "b") - File.write(path1, "") - File.write(path2, "") - FileUtils.mv([path1, path2], path3).should be_nil - File.exists?(path1).should be_false - File.exists?(path2).should be_false - File.exists?(File.join(path3, "a")).should be_true - File.exists?(File.join(path3, "b")).should be_true + source1 = File.join(path1, "a") + source2 = File.join(path2, "b") + test_with_string_and_path(source1, source2, path3) do |arg1, arg2, arg3| + FileUtils.mkdir([path1, path2, path3]) + File.write(source1, "") + File.write(source2, "") + FileUtils.mv([arg1, arg2], arg3).should be_nil + File.exists?(source1).should be_false + File.exists?(source2).should be_false + File.exists?(File.join(path3, "a")).should be_true + File.exists?(File.join(path3, "b")).should be_true + FileUtils.rm_rf([path1, path2, path3]) + end end end it "raises an error if dest is non correct" do expect_raises ArgumentError do with_tempfile("mv-nonexistent") do |path| - FileUtils.mv([File.join(path, "a"), File.join(path, "b")], File.join(path, "c")) + test_with_string_and_path(File.join(path, "a"), File.join(path, "b"), File.join(path, "c")) do |arg1, arg2, arg3| + FileUtils.mv([arg1, arg2], arg3) + end end end end @@ -282,152 +374,191 @@ describe "FileUtils" do path2 = File.join(source_path, "b") path3 = File.join(source_path, "c", "sub") - FileUtils.mkdir_p([path1, path2, target_path]) - path1 = File.join(path1, "a") - path2 = File.join(path2, "b") - File.write(path1, "") - File.write(path2, "") - FileUtils.mv([path1, path2, path3], target_path).should be_nil - File.exists?(path1).should be_false - File.exists?(path2).should be_false - File.exists?(File.join(target_path, "a")).should be_true - File.exists?(File.join(target_path, "b")).should be_true + test_with_string_and_path(path1, path2, path3, target_path) do |arg1, arg2, arg3, arg4| + FileUtils.mkdir_p([path1, path2, target_path]) + path1 = File.join(path1, "a") + path2 = File.join(path2, "b") + File.write(path1, "") + File.write(path2, "") + FileUtils.mv([arg1, arg2, arg3], arg4).should be_nil + File.exists?(path1).should be_false + File.exists?(path2).should be_false + File.exists?(File.join(target_path, "a")).should be_true + File.exists?(File.join(target_path, "b")).should be_true + FileUtils.rm_rf([path1, path2, target_path]) + end end end end it "tests mkdir and rmdir with a new path" do with_tempfile("mkdir-new") do |path| - FileUtils.mkdir(path, 0o700).should be_nil - Dir.exists?(path).should be_true - FileUtils.rmdir(path).should be_nil - Dir.exists?(path).should be_false + test_with_string_and_path(path) do |arg| + FileUtils.mkdir(arg, 0o700).should be_nil + Dir.exists?(path).should be_true + FileUtils.rmdir(arg).should be_nil + Dir.exists?(path).should be_false + end end end it "tests mkdir and rmdir with multiple new paths" do with_tempfile("mkdir-new1", "mkdir-new2") do |path1, path2| - FileUtils.mkdir([path1, path2], 0o700).should be_nil - Dir.exists?(path1).should be_true - Dir.exists?(path2).should be_true - FileUtils.rmdir([path1, path2]).should be_nil - Dir.exists?(path1).should be_false - Dir.exists?(path2).should be_false + test_with_string_and_path(path1, path2) do |*args| + FileUtils.mkdir(args.to_a, 0o700).should be_nil + Dir.exists?(path1).should be_true + Dir.exists?(path2).should be_true + FileUtils.rmdir(args.to_a).should be_nil + Dir.exists?(path1).should be_false + Dir.exists?(path2).should be_false + FileUtils.rm_rf([path1, path2]) + end end end it "tests mkdir with an existing path" do - expect_raises(File::AlreadyExistsError, "Unable to create directory: '#{datapath.inspect_unquoted}'") do - Dir.mkdir(datapath, 0o700) + test_with_string_and_path(datapath) do |arg| + expect_raises(File::AlreadyExistsError, "Unable to create directory: '#{datapath.inspect_unquoted}'") do + Dir.mkdir(arg, 0o700) + end end end it "tests mkdir with multiples existing paths" do - expect_raises(File::AlreadyExistsError, "Unable to create directory: '#{datapath.inspect_unquoted}'") do - FileUtils.mkdir([datapath, datapath], 0o700) + test_with_string_and_path(datapath) do |arg| + expect_raises(File::AlreadyExistsError, "Unable to create directory: '#{datapath.inspect_unquoted}'") do + FileUtils.mkdir([arg, arg], 0o700) + end end with_tempfile("mkdir-nonexistent") do |path| - expect_raises(File::AlreadyExistsError, "Unable to create directory: '#{datapath.inspect_unquoted}'") do - FileUtils.mkdir([path, datapath], 0o700) + test_with_string_and_path(path, datapath) do |*args| + expect_raises(File::AlreadyExistsError, "Unable to create directory: '#{datapath.inspect_unquoted}'") do + FileUtils.mkdir(args.to_a, 0o700) + end + FileUtils.rm_rf(path) end end end it "tests mkdir_p with multiples new path" do with_tempfile("mkdir_p-multi1", "mkdir_p-multi2") do |path1, path2| - FileUtils.mkdir_p([path1, path2]).should be_nil - Dir.exists?(path1).should be_true - Dir.exists?(path2).should be_true path3 = File.join({path1, "a", "b", "c"}) path4 = File.join({path2, "a", "b", "c"}) - FileUtils.mkdir_p([path3, path4]).should be_nil - Dir.exists?(path3).should be_true - Dir.exists?(path4).should be_true + test_with_string_and_path(path3, path4) do |*args| + FileUtils.mkdir_p([path1, path2]).should be_nil + Dir.exists?(path1).should be_true + Dir.exists?(path2).should be_true + FileUtils.mkdir_p(args.to_a).should be_nil + Dir.exists?(path3).should be_true + Dir.exists?(path4).should be_true + FileUtils.rm_rf([path1, path2]) + end end end it "tests mkdir_p with multiple existing path" do FileUtils.mkdir_p([datapath, datapath]).should be_nil with_tempfile("mkdir_p-existing") do |path| - expect_raises(File::AlreadyExistsError, "Unable to create directory: '#{datapath("test_file.txt").inspect_unquoted}'") do - FileUtils.mkdir_p([datapath("test_file.txt"), path]) + test_with_string_and_path(datapath("test_file.txt"), path) do |*args| + expect_raises(File::AlreadyExistsError, "Unable to create directory: '#{datapath("test_file.txt").inspect_unquoted}'") do + FileUtils.mkdir_p(args.to_a) + end end end end it "tests rmdir with an non existing path" do with_tempfile("rmdir-nonexistent") do |path| - expect_raises(File::NotFoundError, "Unable to remove directory: '#{path.inspect_unquoted}'") do - FileUtils.rmdir(path) + test_with_string_and_path(path) do |arg| + expect_raises(File::NotFoundError, "Unable to remove directory: '#{path.inspect_unquoted}'") do + FileUtils.rmdir(arg) + end end end end it "tests rmdir with multiple non existing path" do with_tempfile("rmdir-nonexistent") do |path| - expect_raises(File::NotFoundError, "Unable to remove directory: '#{path.inspect_unquoted}1'") do - FileUtils.rmdir(["#{path}1", "#{path}2"]) + test_with_string_and_path("#{path}1", "#{path}2") do |*args| + expect_raises(File::NotFoundError, "Unable to remove directory: '#{path.inspect_unquoted}1'") do + FileUtils.rmdir(args.to_a) + end end end end it "tests rmdir with a path that cannot be removed" do - expect_raises(File::Error, "Unable to remove directory: '#{datapath.inspect_unquoted}'") do - FileUtils.rmdir(datapath) + test_with_string_and_path(datapath) do |arg| + expect_raises(File::Error, "Unable to remove directory: '#{datapath.inspect_unquoted}'") do + FileUtils.rmdir(arg) + end end end it "tests rmdir with multiple path that cannot be removed" do - expect_raises(File::Error, "Unable to remove directory: '#{datapath.inspect_unquoted}'") do - FileUtils.rmdir([datapath, datapath]) + test_with_string_and_path(datapath) do |arg| + expect_raises(File::Error, "Unable to remove directory: '#{datapath.inspect_unquoted}'") do + FileUtils.rmdir([arg, arg]) + end end end it "tests rm with an existing path" do with_tempfile("rm") do |path| - File.write(path, "") - FileUtils.rm(path).should be_nil - File.exists?(path).should be_false + test_with_string_and_path(path) do |arg| + File.write(path, "") + FileUtils.rm(arg).should be_nil + File.exists?(path).should be_false + end end end it "tests rm with non existing path" do with_tempfile("rm-nonexistent") do |path| - expect_raises(File::NotFoundError, "Error deleting file: '#{path.inspect_unquoted}'") do - FileUtils.rm(path) + test_with_string_and_path(path) do |arg| + expect_raises(File::NotFoundError, "Error deleting file: '#{path.inspect_unquoted}'") do + FileUtils.rm(arg) + end end end end it "tests rm with multiple existing paths" do with_tempfile("rm-multi1", "rm-multi2") do |path1, path2| - File.write(path1, "") - File.write(path2, "") - FileUtils.rm([path1, path2]).should be_nil - File.exists?(path1).should be_false - File.exists?(path2).should be_false + test_with_string_and_path(path1, path2) do |*args| + File.write(path1, "") + File.write(path2, "") + FileUtils.rm(args.to_a).should be_nil + File.exists?(path1).should be_false + File.exists?(path2).should be_false + end end end it "tests rm with some non existing paths" do with_tempfile("rm-nonexistent1", "rm-nonexistent2") do |path1, path2| - File.write(path1, "") - File.write(path2, "") + test_with_string_and_path(path1, path2) do |arg1, arg2| + File.write(path1, "") + File.write(path2, "") - expect_raises(File::NotFoundError, "Error deleting file: '#{path2.inspect_unquoted}'") do - FileUtils.rm([path1, path2, path2]) + expect_raises(File::NotFoundError, "Error deleting file: '#{path2.inspect_unquoted}'") do + FileUtils.rm([arg1, arg2, arg2]) + end end end end - describe "ln" do + describe ".ln" do it "creates a hardlink" do with_tempfile("ln_src", "ln_dst") do |path1, path2| - FileUtils.touch(path1) - FileUtils.ln(path1, path2) - File.exists?(path2).should be_true - File.symlink?(path2).should be_false + test_with_string_and_path(path1, path2) do |arg1, arg2| + FileUtils.touch(path1) + FileUtils.ln(arg1, arg2) + File.exists?(path2).should be_true + File.symlink?(path2).should be_false + FileUtils.rm_rf([path1, path2]) + end end end @@ -435,11 +566,14 @@ describe "FileUtils" do with_tempfile("ln_src", "ln_dst_dir") do |path1, path2| path2 += File::SEPARATOR path3 = File.join(path2, File.basename(path1)) - FileUtils.touch(path1) - FileUtils.mkdir(path2) - FileUtils.ln(path1, path2) - File.exists?(path3).should be_true - File.symlink?(path3).should be_false + test_with_string_and_path(path1, path2) do |arg1, arg2| + FileUtils.touch(path1) + FileUtils.mkdir(path2) + FileUtils.ln(arg1, arg2) + File.exists?(path3).should be_true + File.symlink?(path3).should be_false + FileUtils.rm_rf([path1, path2]) + end end end @@ -447,23 +581,27 @@ describe "FileUtils" do with_tempfile("ln_src_1", "ln_src_2", "ln_src_3", "ln_dst_dir") do |path1, path2, path3, dir_path| paths = [path1, path2, path3] dir_path += File::SEPARATOR - - paths.each { |path| FileUtils.touch(path) } - FileUtils.mkdir(dir_path) - FileUtils.ln(paths, dir_path) - - paths.each do |path| - link_path = File.join(dir_path, File.basename(path)) - File.exists?(link_path).should be_true - File.symlink?(link_path).should be_false + test_with_string_and_path(path1, path2, path3, dir_path) do |arg1, arg2, arg3, arg4| + paths.each { |path| FileUtils.touch(path) } + FileUtils.mkdir(dir_path) + FileUtils.ln([arg1, arg2, arg3], arg4) + + paths.each do |path| + link_path = File.join(dir_path, File.basename(path)) + File.exists?(link_path).should be_true + File.symlink?(link_path).should be_false + end + FileUtils.rm_rf(dir_path) end end end it "fails with a nonexistent source" do with_tempfile("ln_src_missing", "ln_dst_missing") do |path1, path2| - ex = expect_raises(File::NotFoundError, "Error creating link: '#{path1.inspect_unquoted}' -> '#{path2.inspect_unquoted}'") do - FileUtils.ln(path1, path2) + test_with_string_and_path(path1, path2) do |arg1, arg2| + expect_raises(File::NotFoundError, "Error creating link: '#{path1.inspect_unquoted}' -> '#{path2.inspect_unquoted}'") do + FileUtils.ln(arg1, arg2) + end end end end @@ -472,20 +610,25 @@ describe "FileUtils" do with_tempfile("ln_src", "ln_dst_exists") do |path1, path2| FileUtils.touch([path1, path2]) - expect_raises(File::AlreadyExistsError, "Error creating link: '#{path1.inspect_unquoted}' -> '#{path2.inspect_unquoted}'") do - FileUtils.ln(path1, path2) + test_with_string_and_path(path1, path2) do |arg1, arg2| + expect_raises(File::AlreadyExistsError, "Error creating link: '#{path1.inspect_unquoted}' -> '#{path2.inspect_unquoted}'") do + FileUtils.ln(arg1, arg2) + end end end end end - describe "ln_s" do + describe ".ln_s" do it "creates a symlink" do with_tempfile("ln_s_src", "ln_s_dst") do |path1, path2| - FileUtils.touch(path1) - FileUtils.ln_s(path1, path2) - File.exists?(path2).should be_true - File.symlink?(path2).should be_true + test_with_string_and_path(path1, path2) do |arg1, arg2| + FileUtils.touch(path1) + FileUtils.ln_s(arg1, arg2) + File.exists?(path2).should be_true + File.symlink?(path2).should be_true + FileUtils.rm_rf([path1, path2]) + end end end @@ -493,63 +636,79 @@ describe "FileUtils" do with_tempfile("ln_s_src", "ln_s_dst_dir") do |path1, path2| path3 = File.join(path2, File.basename(path1)) - FileUtils.touch(path1) - FileUtils.mkdir(path2) - FileUtils.ln_s(path1, path2) - File.exists?(path3).should be_true - File.symlink?(path3).should be_true + test_with_string_and_path(path1, path2) do |arg1, arg2| + FileUtils.touch(path1) + FileUtils.mkdir(path2) + FileUtils.ln_s(arg1, arg2) + File.exists?(path3).should be_true + File.symlink?(path3).should be_true + FileUtils.rm_rf([path1, path2]) + end end end it "creates multiple symlinks inside a destination dir" do with_tempfile("ln_s_src_1", "ln_s_src_2", "ln_s_src_3", "ln_s_dst_dir") do |path1, path2, path3, dir_path| - paths = [path1, path2, path3] dir_path += File::SEPARATOR - paths.each { |path| FileUtils.touch(path) } - FileUtils.mkdir(dir_path) - FileUtils.ln_s(paths, dir_path) - - paths.each do |path| - link_path = File.join(dir_path, File.basename(path)) - File.exists?(link_path).should be_true - File.symlink?(link_path).should be_true + test_with_string_and_path(path1, path2, path3, dir_path) do |arg1, arg2, arg3, dir_arg| + paths = [arg1, arg2, arg3] + paths.each { |path| FileUtils.touch(path) } + FileUtils.mkdir(dir_path) + FileUtils.ln_s(paths, dir_arg) + + paths.each do |path| + link_path = File.join(dir_path, File.basename(path)) + File.exists?(link_path).should be_true + File.symlink?(link_path).should be_true + end + FileUtils.rm_rf(paths) + FileUtils.rm_rf(dir_path) end end end pending_win32 "works with a nonexistent source" do with_tempfile("ln_s_src_missing", "ln_s_dst_missing") do |path1, path2| - FileUtils.ln_s(path1, path2) - File.exists?(path2).should be_false - File.symlink?(path2).should be_true - - expect_raises(File::NotFoundError, "Error resolving real path: '#{path2.inspect_unquoted}'") do - File.real_path(path2) + test_with_string_and_path(path1, path2) do |arg1, arg2| + FileUtils.ln_s(arg1, arg2) + File.exists?(path2).should be_false + File.symlink?(path2).should be_true + + expect_raises(File::NotFoundError, "Error resolving real path: '#{path2.inspect_unquoted}'") do + File.real_path(path2) + end + FileUtils.rm_rf(path2) end end end - it "fails with an extant destination" do + it "fails with an existing destination" do with_tempfile("ln_s_src", "ln_s_dst_exists") do |path1, path2| - FileUtils.touch([path1, path2]) + test_with_string_and_path(path1, path2) do |arg1, arg2| + FileUtils.touch([path1, path2]) - expect_raises(File::AlreadyExistsError, "Error creating symlink: '#{path1.inspect_unquoted}' -> '#{path2.inspect_unquoted}'") do - FileUtils.ln_s(path1, path2) + expect_raises(File::AlreadyExistsError, "Error creating symlink: '#{path1.inspect_unquoted}' -> '#{path2.inspect_unquoted}'") do + FileUtils.ln_s(arg1, arg2) + end + FileUtils.rm_rf([path1, path2]) end end end end - describe "ln_sf" do + describe ".ln_sf" do it "overwrites a destination file" do with_tempfile("ln_sf_src", "ln_sf_dst_exists") do |path1, path2| - FileUtils.touch([path1, path2]) - File.symlink?(path1).should be_false - File.symlink?(path2).should be_false - - FileUtils.ln_sf(path1, path2) - File.symlink?(path1).should be_false - File.symlink?(path2).should be_true + test_with_string_and_path(path1, path2) do |arg1, arg2| + FileUtils.touch([path1, path2]) + File.symlink?(path1).should be_false + File.symlink?(path2).should be_false + + FileUtils.ln_sf(arg1, arg2) + File.symlink?(path1).should be_false + File.symlink?(path2).should be_true + FileUtils.rm_rf([path1, path2]) + end end end @@ -558,41 +717,52 @@ describe "FileUtils" do dir += File::SEPARATOR path1 = File.join(dir, File.basename(path2)) - FileUtils.mkdir(dir) - FileUtils.touch([path1, path2]) - File.symlink?(path1).should be_false - File.symlink?(path2).should be_false + test_with_string_and_path(dir, path2) do |dir_arg, arg2| + FileUtils.mkdir(dir) + FileUtils.touch([path1, path2]) + File.symlink?(path1).should be_false + File.symlink?(path2).should be_false - FileUtils.ln_sf(path2, dir) - File.symlink?(path1).should be_true - File.symlink?(path2).should be_false + FileUtils.ln_sf(arg2, dir_arg) + File.symlink?(path1).should be_true + File.symlink?(path2).should be_false + + FileUtils.rm_rf([dir, path1, path2]) + end end end it "creates multiple symlinks in a destination dir, with overwrites" do with_tempfile("ln_sf_src_dir", "ln_sf_dst_dir") do |src_dir, dir| - paths1 = Array.new(3) { |i| "exists_#{i}" } - paths2 = paths1.map { |p| File.join(src_dir, p) } - paths3 = paths1.map { |p| File.join(dir, p) } + test_with_string_and_path(src_dir, dir) do |src_arg, dir_arg| + paths1 = Array.new(3) { |i| "exists_#{i}" } + paths2 = paths1.map { |p| File.join(src_arg, p) } + paths3 = paths1.map { |p| File.join(dir, p) } + + FileUtils.mkdir(src_dir) + FileUtils.mkdir(dir) + FileUtils.touch(paths2 + paths3) + (paths2 + paths3).each { |p| File.symlink?(p).should be_false } - FileUtils.mkdir(src_dir) - FileUtils.mkdir(dir) - FileUtils.touch(paths2 + paths3) - (paths2 + paths3).each { |p| File.symlink?(p).should be_false } + FileUtils.ln_sf(paths2, dir_arg) + paths2.each { |p| File.symlink?(p).should be_false } + paths3.each { |p| File.symlink?(p).should be_true } - FileUtils.ln_sf(paths2, dir) - paths2.each { |p| File.symlink?(p).should be_false } - paths3.each { |p| File.symlink?(p).should be_true } + FileUtils.rm_rf([src_dir, dir]) + end end end it "creates a symlink even if there's nothing to overwrite" do with_tempfile("ln_sf_src", "ln_sf_dst") do |path1, path2| - FileUtils.touch(path1) - File.exists?(path2).should be_false + test_with_string_and_path(path1, path2) do |arg1, arg2| + FileUtils.touch(path1) + File.exists?(path2).should be_false - FileUtils.ln_sf(path1, path2) - File.symlink?(path2).should be_true + FileUtils.ln_sf(arg1, arg2) + File.symlink?(path2).should be_true + FileUtils.rm_rf([path1, path2]) + end end end end diff --git a/spec/std/float_spec.cr b/spec/std/float_spec.cr index ac4f68a9d16b..6e8b60f5e555 100644 --- a/spec/std/float_spec.cr +++ b/spec/std/float_spec.cr @@ -205,6 +205,50 @@ describe "Float" do end end + describe "#next_float" do + it "does for f64" do + 0.0.next_float.should eq(5.0e-324) # smallest denormal (not MIN_POSITIVE) + 1.0.next_float.should eq(1.0000000000000002) + (-1.0).next_float.should eq(-0.9999999999999999) + Float64::MAX.next_float.should eq(Float64::INFINITY) + Float64::INFINITY.next_float.should eq(Float64::INFINITY) + (-Float64::INFINITY).next_float.should eq(Float64::MIN) + Float64::NAN.next_float.nan?.should be_true + end + + it "does for f32" do + 0.0_f32.next_float.should eq(1.0e-45_f32) # smallest denormal (not MIN_POSITIVE) + 1.0_f32.next_float.should eq(1.0000001_f32) + (-1.0_f32).next_float.should eq(-0.99999994_f32) + Float32::MAX.next_float.should eq(Float32::INFINITY) + Float32::INFINITY.next_float.should eq(Float32::INFINITY) + (-Float32::INFINITY).next_float.should eq(Float32::MIN) + Float32::NAN.next_float.nan?.should be_true + end + end + + describe "#prev_float" do + it "does for f64" do + 0.0.prev_float.should eq(-5.0e-324) # smallest denormal (not MIN_POSITIVE) + 1.0.prev_float.should eq(0.9999999999999999) + (-1.0).prev_float.should eq(-1.0000000000000002) + Float64::MIN.prev_float.should eq(-Float64::INFINITY) + Float64::INFINITY.prev_float.should eq(Float64::MAX) + (-Float64::INFINITY).prev_float.should eq(-Float64::INFINITY) + Float64::NAN.prev_float.nan?.should be_true + end + + it "does for f32" do + 0.0_f32.prev_float.should eq(-1.0e-45_f32) # smallest denormal (not MIN_POSITIVE) + 1.0_f32.prev_float.should eq(0.99999994_f32) + (-1.0_f32).prev_float.should eq(-1.0000001_f32) + Float32::MIN.prev_float.should eq(-Float32::INFINITY) + Float32::INFINITY.prev_float.should eq(Float32::MAX) + (-Float32::INFINITY).prev_float.should eq(-Float32::INFINITY) + Float32::NAN.prev_float.nan?.should be_true + end + end + describe "#inspect" do it "does inspect for f64" do 3.2.inspect.should eq("3.2") diff --git a/spec/std/http/web_socket_spec.cr b/spec/std/http/web_socket_spec.cr index c6ec2c4271e0..8893dbc6f558 100644 --- a/spec/std/http/web_socket_spec.cr +++ b/spec/std/http/web_socket_spec.cr @@ -423,6 +423,26 @@ describe HTTP::WebSocket do end end + it "sends correct HTTP basic auth header" do + ws_handler = HTTP::WebSocketHandler.new do |ws, ctx| + ws.send ctx.request.headers["Authorization"] + ws.close + end + http_server = HTTP::Server.new([ws_handler]) + address = http_server.bind_unused_port + + run_server(http_server) do + client = HTTP::WebSocket.new("ws://test_username:test_password@#{address}") + message = nil + client.on_message do |msg| + message = msg + end + client.run + message.should eq( + "Basic #{Base64.strict_encode("test_username:test_password")}") + end + end + it "handshake fails if server does not switch protocols" do http_server = HTTP::Server.new do |context| context.response.status_code = 200 diff --git a/spec/std/int_spec.cr b/spec/std/int_spec.cr index a5d111ba5a6c..67ae40774793 100644 --- a/spec/std/int_spec.cr +++ b/spec/std/int_spec.cr @@ -4,12 +4,13 @@ require "./spec_helper" {% end %} require "spec/helpers/iterate" -private def to_s_with_io(num) - String.build { |io| num.to_s(io) } -end - -private def to_s_with_io(num, base, upcase = false) - String.build { |io| num.to_s(io, base, upcase: upcase) } +private macro it_converts_to_s(num, str, **opts) + it {{ "converts #{num} to #{str}" }} do + num = {{ num }} + str = {{ str }} + num.to_s({{ opts.double_splat }}).should eq(str) + String.build { |io| num.to_s(io, {{ opts.double_splat }}) }.should eq(str) + end end describe "Int" do @@ -166,79 +167,116 @@ describe "Int" do it "doesn't silently overflow" { 2_000_000.lcm(3_000_000).should eq(6_000_000) } end - describe "to_s in base" do - it { 12.to_s(2).should eq("1100") } - it { -12.to_s(2).should eq("-1100") } - it { -123456.to_s(2).should eq("-11110001001000000") } - it { 1234.to_s(16).should eq("4d2") } - it { -1234.to_s(16).should eq("-4d2") } - it { 1234.to_s(36).should eq("ya") } - it { -1234.to_s(36).should eq("-ya") } - it { 1234.to_s(16, upcase: true).should eq("4D2") } - it { -1234.to_s(16, upcase: true).should eq("-4D2") } - it { 1234.to_s(36, upcase: true).should eq("YA") } - it { -1234.to_s(36, upcase: true).should eq("-YA") } - it { 0.to_s(2).should eq("0") } - it { 0.to_s(16).should eq("0") } - it { 1.to_s(2).should eq("1") } - it { 1.to_s(16).should eq("1") } - it { 0.to_s(62).should eq("0") } - it { 1.to_s(62).should eq("1") } - it { 10.to_s(62).should eq("a") } - it { 35.to_s(62).should eq("z") } - it { 36.to_s(62).should eq("A") } - it { 61.to_s(62).should eq("Z") } - it { 62.to_s(62).should eq("10") } - it { 97.to_s(62).should eq("1z") } - it { 3843.to_s(62).should eq("ZZ") } - - it "raises on base 1" do - expect_raises(ArgumentError, "Invalid base 1") { 123.to_s(1) } - end - - it "raises on base 37" do - expect_raises(ArgumentError, "Invalid base 37") { 123.to_s(37) } - end - - it "raises on base 62 with upcase" do - expect_raises(ArgumentError, "upcase must be false for base 62") { 123.to_s(62, upcase: true) } - end - - it { to_s_with_io(12, 2).should eq("1100") } - it { to_s_with_io(-12, 2).should eq("-1100") } - it { to_s_with_io(-123456, 2).should eq("-11110001001000000") } - it { to_s_with_io(1234, 16).should eq("4d2") } - it { to_s_with_io(-1234, 16).should eq("-4d2") } - it { to_s_with_io(1234, 36).should eq("ya") } - it { to_s_with_io(-1234, 36).should eq("-ya") } - it { to_s_with_io(1234, 16, upcase: true).should eq("4D2") } - it { to_s_with_io(-1234, 16, upcase: true).should eq("-4D2") } - it { to_s_with_io(1234, 36, upcase: true).should eq("YA") } - it { to_s_with_io(-1234, 36, upcase: true).should eq("-YA") } - it { to_s_with_io(0, 2).should eq("0") } - it { to_s_with_io(0, 16).should eq("0") } - it { to_s_with_io(1, 2).should eq("1") } - it { to_s_with_io(1, 16).should eq("1") } - it { to_s_with_io(0, 62).should eq("0") } - it { to_s_with_io(1, 62).should eq("1") } - it { to_s_with_io(10, 62).should eq("a") } - it { to_s_with_io(35, 62).should eq("z") } - it { to_s_with_io(36, 62).should eq("A") } - it { to_s_with_io(61, 62).should eq("Z") } - it { to_s_with_io(62, 62).should eq("10") } - it { to_s_with_io(97, 62).should eq("1z") } - it { to_s_with_io(3843, 62).should eq("ZZ") } - - it "raises on base 1 with io" do - expect_raises(ArgumentError, "Invalid base 1") { to_s_with_io(123, 1) } - end - - it "raises on base 37 with io" do - expect_raises(ArgumentError, "Invalid base 37") { to_s_with_io(123, 37) } - end - - it "raises on base 62 with upcase with io" do - expect_raises(ArgumentError, "upcase must be false for base 62") { to_s_with_io(12, 62, upcase: true) } + describe "#to_s" do + it_converts_to_s 0, "0" + it_converts_to_s 1, "1" + + context "extrema for various int sizes" do + it_converts_to_s 127_i8, "127" + it_converts_to_s -128_i8, "-128" + + it_converts_to_s 32767_i16, "32767" + it_converts_to_s -32768_i16, "-32768" + + it_converts_to_s 2147483647, "2147483647" + it_converts_to_s -2147483648, "-2147483648" + + it_converts_to_s 9223372036854775807_i64, "9223372036854775807" + it_converts_to_s -9223372036854775808_i64, "-9223372036854775808" + + it_converts_to_s 255_u8, "255" + it_converts_to_s 65535_u16, "65535" + it_converts_to_s 4294967295_u32, "4294967295" + it_converts_to_s 18446744073709551615_u64, "18446744073709551615" + end + + context "base and upcase parameters" do + it_converts_to_s 12, "1100", base: 2 + it_converts_to_s -12, "-1100", base: 2 + it_converts_to_s -123456, "-11110001001000000", base: 2 + it_converts_to_s 1234, "4d2", base: 16 + it_converts_to_s -1234, "-4d2", base: 16 + it_converts_to_s 1234, "ya", base: 36 + it_converts_to_s -1234, "-ya", base: 36 + it_converts_to_s 1234, "4D2", base: 16, upcase: true + it_converts_to_s -1234, "-4D2", base: 16, upcase: true + it_converts_to_s 1234, "YA", base: 36, upcase: true + it_converts_to_s -1234, "-YA", base: 36, upcase: true + it_converts_to_s 0, "0", base: 2 + it_converts_to_s 0, "0", base: 16 + it_converts_to_s 1, "1", base: 2 + it_converts_to_s 1, "1", base: 16 + it_converts_to_s 0, "0", base: 62 + it_converts_to_s 1, "1", base: 62 + it_converts_to_s 10, "a", base: 62 + it_converts_to_s 35, "z", base: 62 + it_converts_to_s 36, "A", base: 62 + it_converts_to_s 61, "Z", base: 62 + it_converts_to_s 62, "10", base: 62 + it_converts_to_s 97, "1z", base: 62 + it_converts_to_s 3843, "ZZ", base: 62 + + it "raises on base 1" do + expect_raises(ArgumentError, "Invalid base 1") { 123.to_s(1) } + expect_raises(ArgumentError, "Invalid base 1") { 123.to_s(IO::Memory.new, 1) } + end + + it "raises on base 37" do + expect_raises(ArgumentError, "Invalid base 37") { 123.to_s(37) } + expect_raises(ArgumentError, "Invalid base 37") { 123.to_s(IO::Memory.new, 37) } + end + + it "raises on base 62 with upcase" do + expect_raises(ArgumentError, "upcase must be false for base 62") { 123.to_s(62, upcase: true) } + expect_raises(ArgumentError, "upcase must be false for base 62") { 123.to_s(IO::Memory.new, 62, upcase: true) } + end + end + + context "precision parameter" do + it_converts_to_s 0, "", precision: 0 + it_converts_to_s 0, "0", precision: 1 + it_converts_to_s 0, "00", precision: 2 + it_converts_to_s 0, "00000", precision: 5 + it_converts_to_s 0, "0" * 200, precision: 200 + + it_converts_to_s 1, "1", precision: 0 + it_converts_to_s 1, "1", precision: 1 + it_converts_to_s 1, "01", precision: 2 + it_converts_to_s 1, "00001", precision: 5 + it_converts_to_s 1, "#{"0" * 199}1", precision: 200 + + it_converts_to_s 2, "2", precision: 0 + it_converts_to_s 2, "2", precision: 1 + it_converts_to_s 2, "02", precision: 2 + it_converts_to_s 2, "00002", precision: 5 + it_converts_to_s 2, "#{"0" * 199}2", precision: 200 + + it_converts_to_s -1, "-1", precision: 0 + it_converts_to_s -1, "-1", precision: 1 + it_converts_to_s -1, "-01", precision: 2 + it_converts_to_s -1, "-00001", precision: 5 + it_converts_to_s -1, "-#{"0" * 199}1", precision: 200 + + it_converts_to_s 123, "123", precision: 0 + it_converts_to_s 123, "123", precision: 1 + it_converts_to_s 123, "123", precision: 2 + it_converts_to_s 123, "00123", precision: 5 + it_converts_to_s 123, "#{"0" * 197}123", precision: 200 + + it_converts_to_s 9223372036854775807_i64, "#{"1" * 63}", base: 2, precision: 62 + it_converts_to_s 9223372036854775807_i64, "#{"1" * 63}", base: 2, precision: 63 + it_converts_to_s 9223372036854775807_i64, "0#{"1" * 63}", base: 2, precision: 64 + it_converts_to_s 9223372036854775807_i64, "#{"0" * 137}#{"1" * 63}", base: 2, precision: 200 + + it_converts_to_s -9223372036854775808_i64, "-1#{"0" * 63}", base: 2, precision: 63 + it_converts_to_s -9223372036854775808_i64, "-1#{"0" * 63}", base: 2, precision: 64 + it_converts_to_s -9223372036854775808_i64, "-01#{"0" * 63}", base: 2, precision: 65 + it_converts_to_s -9223372036854775808_i64, "-#{"0" * 136}1#{"0" * 63}", base: 2, precision: 200 + + it "raises on negative precision" do + expect_raises(ArgumentError, "Precision must be non-negative") { 123.to_s(precision: -1) } + expect_raises(ArgumentError, "Precision must be non-negative") { 123.to_s(IO::Memory.new, precision: -1) } + end end end @@ -358,54 +396,6 @@ describe "Int" do end end - describe "to_s" do - it "does to_s for various int sizes" do - 0.to_s.should eq("0") - 1.to_s.should eq("1") - - 127_i8.to_s.should eq("127") - -128_i8.to_s.should eq("-128") - - 32767_i16.to_s.should eq("32767") - -32768_i16.to_s.should eq("-32768") - - 2147483647.to_s.should eq("2147483647") - -2147483648.to_s.should eq("-2147483648") - - 9223372036854775807_i64.to_s.should eq("9223372036854775807") - -9223372036854775808_i64.to_s.should eq("-9223372036854775808") - - 255_u8.to_s.should eq("255") - 65535_u16.to_s.should eq("65535") - 4294967295_u32.to_s.should eq("4294967295") - - 18446744073709551615_u64.to_s.should eq("18446744073709551615") - end - - it "does to_s for various int sizes with IO" do - to_s_with_io(0).should eq("0") - to_s_with_io(1).should eq("1") - - to_s_with_io(127_i8).should eq("127") - to_s_with_io(-128_i8).should eq("-128") - - to_s_with_io(32767_i16).should eq("32767") - to_s_with_io(-32768_i16).should eq("-32768") - - to_s_with_io(2147483647).should eq("2147483647") - to_s_with_io(-2147483648).should eq("-2147483648") - - to_s_with_io(9223372036854775807_i64).should eq("9223372036854775807") - to_s_with_io(-9223372036854775808_i64).should eq("-9223372036854775808") - - to_s_with_io(255_u8).should eq("255") - to_s_with_io(65535_u16).should eq("65535") - to_s_with_io(4294967295_u32).should eq("4294967295") - - to_s_with_io(18446744073709551615_u64).should eq("18446744073709551615") - end - end - describe "step" do it "steps through limit" do passed = false diff --git a/spec/std/json/pull_parser_spec.cr b/spec/std/json/pull_parser_spec.cr index 48d0b74d0826..c488825185f2 100644 --- a/spec/std/json/pull_parser_spec.cr +++ b/spec/std/json/pull_parser_spec.cr @@ -162,6 +162,10 @@ describe JSON::PullParser do assert_pull_parse_error %({"name": "John", "age", 1}) assert_pull_parse_error %({"name": "John", "age": "foo", "bar"}) + it "parses when the input IO is already empty" do + JSON::PullParser.new(IO::Memory.new).kind.should eq JSON::PullParser::Kind::EOF + end + it "prevents stack overflow for arrays" do parser = JSON::PullParser.new(("[" * 513) + ("]" * 513)) expect_raises JSON::ParseException, "Nesting of 513 is too deep" do diff --git a/spec/std/named_tuple_spec.cr b/spec/std/named_tuple_spec.cr index 6dc6684be827..80e326fa253e 100644 --- a/spec/std/named_tuple_spec.cr +++ b/spec/std/named_tuple_spec.cr @@ -148,7 +148,7 @@ describe "NamedTuple" do typeof(val).should eq(Int32 | Char | Nil) end - describe "dig?" do + describe "#dig?" do it "gets the value at given path given splat" do h = {a: {b: {c: [10, 20]}}, x: {a: "b"}} @@ -165,7 +165,7 @@ describe "NamedTuple" do end end - describe "dig" do + describe "#dig" do it "gets the value at given path given splat" do h = {a: {b: {c: [10, 20]}}, x: {a: "b", c: nil}} @@ -290,9 +290,18 @@ describe "NamedTuple" do NamedTuple.new.empty?.should be_true end - it "does to_a" do - tup = {a: 1, b: 'a'} - tup.to_a.should eq([{:a, 1}, {:b, 'a'}]) + describe "#to_a" do + it "creates an array of key-value pairs" do + tup = {a: 1, b: 'a'} + tup.to_a.should eq([{:a, 1}, {:b, 'a'}]) + end + + it "preserves key type for empty named tuples" do + tup = NamedTuple.new + arr = tup.to_a + arr.should be_empty + arr.should be_a(Array({Symbol, NoReturn})) + end end it "does map" do @@ -327,10 +336,19 @@ describe "NamedTuple" do u.should_not eq(v) end - it "does to_h" do - tup1 = {a: 1, b: "hello"} - hash = tup1.to_h - hash.should eq({:a => 1, :b => "hello"}) + describe "#to_h" do + it "creates a hash" do + tup1 = {a: 1, b: "hello"} + hash = tup1.to_h + hash.should eq({:a => 1, :b => "hello"}) + end + + it "creates an empty hash from an empty named tuple" do + tup = NamedTuple.new + hash = tup.to_h + hash.should be_empty + hash.should be_a(Hash(Symbol, NoReturn)) + end end it "does to_s" do diff --git a/spec/std/slice_spec.cr b/spec/std/slice_spec.cr index 86b2a0dcd837..5a1aed451132 100644 --- a/spec/std/slice_spec.cr +++ b/spec/std/slice_spec.cr @@ -148,6 +148,57 @@ describe "Slice" do slice.to_s.should eq("Bytes[1, 2, 3]") end + describe "#fill" do + it "replaces all values, without block" do + slice = Slice.new(4) { |i| i + 1 } + expected = Slice.new(4, 7) + slice.fill(7).should eq(expected) + slice.should eq(expected) + + expected = Slice.new(4, 5) + slice.fill(5).should eq(expected) + slice.should eq(expected) + end + + it "works with primitive number types and 0" do + slice = Slice.new(4) { |i| i + 1 } + expected = Slice.new(4, 0) + slice.fill(0).should eq(expected) + slice.should eq(expected) + + slice = Slice.new(4, &.to_f64) + expected = Slice.new(4, 0.0) + slice.fill(0.0).should eq(expected) + slice.should eq(expected) + + slice = Slice.new(4, &.to_u8) + expected = Slice.new(4, 0_u8) + slice.fill(0).should eq(expected) + slice.should eq(expected) + end + + it "works with Bytes" do + slice = Bytes[1, 2, 3] + expected = Slice.new(3, 7_u8) + slice.fill(7).should eq(expected) + slice.should eq(expected) + end + + it "replaces all values, with block" do + slice = Slice.new(4) { |i| i + 1 } + expected = Slice[0, 1, 4, 9] + slice.fill { |i| i * i }.should eq(expected) + slice.should eq(expected) + end + + it "replaces all values, with block and offset" do + slice = Slice.new(4) { |i| i + 1 } + expected = Slice[9, 16, 25, 36] + slice.fill(offset: 3) { |i| i * i }.should eq(expected) + slice.should eq(expected) + end + end + it "does copy_from pointer" do pointer = Pointer.malloc(4) { |i| i + 1 } slice = Slice.new(4, 0) @@ -507,6 +558,7 @@ describe "Slice" do it "creates read-only slice" do slice = Slice.new(3, 0, read_only: true) expect_raises(Exception, "Can't write to read-only Slice") { slice[0] = 1 } + expect_raises(Exception, "Can't write to read-only Slice") { slice.fill(0) } expect_raises(Exception, "Can't write to read-only Slice") { slice.copy_from(slice) } subslice = slice[0, 1] @@ -580,59 +632,49 @@ describe "Slice" do end describe "sort" do - [true, false].each do |stable| - describe "stable: #{stable}" do - it "sort without block" do + {% for sort in ["sort".id, "unstable_sort".id] %} + describe {{ "##{sort}" }} do + it "without block" do slice = Slice[3, 4, 1, 2, 5, 6] - sorted_slice = slice.sort(stable: stable) + sorted_slice = slice.{{ sort }} sorted_slice.to_a.should eq([1, 2, 3, 4, 5, 6]) slice.should_not eq(sorted_slice) end - it "sort with a block" do + it "with a block" do a = Slice["foo", "a", "hello"] - b = a.sort(stable: stable) { |x, y| x.size <=> y.size } + b = a.{{ sort }} { |x, y| x.size <=> y.size } b.to_a.should eq(["a", "foo", "hello"]) a.should_not eq(b) end - end - end - - it "stable sort without block" do - is_stable_sort(mutable: false, &.sort(stable: true)) - end - it "stable sort with a block" do - is_stable_sort(mutable: false, &.sort(stable: true) { |a, b| a.value <=> b.value }) - end - - it "default is stable (without block)" do - is_stable_sort(mutable: false, &.sort) - end + {% if sort == "sort" %} + it "stable sort without a block" do + is_stable_sort(mutable: false, &.sort) + end - it "default is stable (with a block)" do - is_stable_sort(mutable: false, &.sort { |a, b| a.value <=> b.value }) - end - end + it "stable sort with a block" do + is_stable_sort(mutable: false, &.sort { |a, b| a.value <=> b.value }) + end + {% end %} + end - describe "sort!" do - [true, false].each do |stable| - describe "stable: #{stable}" do - it "sort! without block" do + describe {{ "##{sort}!" }} do + it "without block" do a = [3, 4, 1, 2, 5, 6] - a.sort!(stable: stable) + a.{{ sort }}! a.should eq([1, 2, 3, 4, 5, 6]) end - it "sort! with a block" do + it "with a block" do a = ["foo", "a", "hello"] - a.sort!(stable: stable) { |x, y| x.size <=> y.size } + a.{{ sort }}! { |x, y| x.size <=> y.size } a.should eq(["a", "foo", "hello"]) end it "sorts with invalid block (#4379)" do a = [1] * 17 - b = a.sort(stable: stable) { -1 } + b = a.{{ sort }} { -1 } a.should eq(b) end @@ -644,7 +686,7 @@ describe "Slice" do Spaceship.new(3), ] - spaceships.sort!(stable: stable) + spaceships.{{ sort }}! 4.times do |i| spaceships[i].value.should eq(i) end @@ -657,81 +699,63 @@ describe "Slice" do ] expect_raises(ArgumentError) do - spaceships.sort!(stable: stable) + spaceships.{{ sort }}! end end it "raises if sort! block returns nil" do expect_raises(ArgumentError) do - Slice[1, 2].sort!(stable: stable) { nil } + Slice[1, 2].{{ sort }}! { nil } end end - end - end - - it "stable sort! without block" do - is_stable_sort(mutable: true, &.sort!(stable: true)) - end - it "stable sort! with a block" do - is_stable_sort(mutable: true, &.sort!(stable: true) { |a, b| a.value <=> b.value }) - end - - it "default is stable (without block)" do - is_stable_sort(mutable: true, &.sort!) - end + {% if sort == "sort" %} + it "stable sort without a block" do + is_stable_sort(mutable: true, &.sort!) + end - it "default is stable (with a block)" do - is_stable_sort(mutable: true, &.sort! { |a, b| a.value <=> b.value }) - end - end + it "stable sort with a block" do + is_stable_sort(mutable: true, &.sort! { |a, b| a.value <=> b.value }) + end + {% end %} + end - describe "sort_by" do - [true, false].each do |stable| - describe "stable: #{stable}" do - it "sorts by" do + describe {{ "##{sort}_by" }} do + it "sorts" do a = Slice["foo", "a", "hello"] - b = a.sort_by(stable: stable, &.size) + b = a.{{ sort }}_by(&.size) b.to_a.should eq(["a", "foo", "hello"]) a.should_not eq(b) end - end - end - it "stable sort by" do - is_stable_sort(mutable: false, &.sort_by(stable: true, &.value)) - end - - it "default is stable" do - is_stable_sort(mutable: false, &.sort_by(&.value)) - end - end + {% if sort == "sort" %} + it "stable sort" do + is_stable_sort(mutable: false, &.sort_by(&.value)) + end + {% end %} + end - describe "sort_by!" do - [true, false].each do |stable| - describe "stable: #{stable}" do - it "sorts by!" do + describe {{ "##{sort}_by" }} do + it "sorts" do a = Slice["foo", "a", "hello"] - a.sort_by!(stable: stable, &.size) + a.{{ sort }}_by!(&.size) a.to_a.should eq(["a", "foo", "hello"]) end it "calls given block exactly once for each element" do calls = Hash(String, Int32).new(0) a = Slice["foo", "a", "hello"] - a.sort_by!(stable: stable) { |e| calls[e] += 1; e.size } + a.{{ sort }}_by! { |e| calls[e] += 1; e.size } calls.should eq({"foo" => 1, "a" => 1, "hello" => 1}) end - end - end - - it "stable sort by!" do - is_stable_sort(mutable: true, &.sort_by!(stable: true, &.value)) - end - it "default is stable" do - is_stable_sort(mutable: true, &.sort_by!(&.value)) - end + {% if sort == "sort" %} + it "stable sort" do + is_stable_sort(mutable: true, &.sort_by!(&.value)) + end + {% end %} + end + {% end %} end describe "<=>" do diff --git a/spec/std/socket/addrinfo_spec.cr b/spec/std/socket/addrinfo_spec.cr index b197e50679ac..96024ca0cfc2 100644 --- a/spec/std/socket/addrinfo_spec.cr +++ b/spec/std/socket/addrinfo_spec.cr @@ -1,7 +1,7 @@ require "spec" require "socket/addrinfo" -describe Socket::Addrinfo do +describe Socket::Addrinfo, tags: "network" do describe ".resolve" do it "returns an array" do addrinfos = Socket::Addrinfo.resolve("localhost", 80, type: Socket::Type::STREAM) diff --git a/spec/std/socket/socket_spec.cr b/spec/std/socket/socket_spec.cr index 98617d81abc7..86ed83d2bbc9 100644 --- a/spec/std/socket/socket_spec.cr +++ b/spec/std/socket/socket_spec.cr @@ -1,7 +1,7 @@ require "./spec_helper" require "../../support/tempfile" -describe Socket do +describe Socket, tags: "network" do describe ".unix" do it "creates a unix socket" do sock = Socket.unix diff --git a/spec/std/socket/tcp_server_spec.cr b/spec/std/socket/tcp_server_spec.cr index fe38253890af..3b8a153e2a31 100644 --- a/spec/std/socket/tcp_server_spec.cr +++ b/spec/std/socket/tcp_server_spec.cr @@ -1,6 +1,6 @@ require "./spec_helper" -describe TCPServer do +describe TCPServer, tags: "network" do describe ".new" do each_ip_family do |family, address| it "listens on local address" do diff --git a/spec/std/socket/tcp_socket_spec.cr b/spec/std/socket/tcp_socket_spec.cr index 8b77c8afb90c..c28983a22b1a 100644 --- a/spec/std/socket/tcp_socket_spec.cr +++ b/spec/std/socket/tcp_socket_spec.cr @@ -1,6 +1,6 @@ require "./spec_helper" -describe TCPSocket do +describe TCPSocket, tags: "network" do describe "#connect" do each_ip_family do |family, address| it "connects to server" do diff --git a/spec/std/socket/udp_socket_spec.cr b/spec/std/socket/udp_socket_spec.cr index 48cb5c394d6a..074369a0a61c 100644 --- a/spec/std/socket/udp_socket_spec.cr +++ b/spec/std/socket/udp_socket_spec.cr @@ -1,7 +1,7 @@ require "./spec_helper" require "socket" -describe UDPSocket do +describe UDPSocket, tags: "network" do # Note: This spec fails with a IPv6 address. See pending below. it "#remote_address resets after connect" do socket = UDPSocket.new diff --git a/spec/std/static_array_spec.cr b/spec/std/static_array_spec.cr index 0bb01832dc65..4c359cdebd57 100644 --- a/spec/std/static_array_spec.cr +++ b/spec/std/static_array_spec.cr @@ -80,18 +80,24 @@ describe "StaticArray" do a.to_s.should eq("StaticArray[1, 2, 3]") end - it "does #fill, without block" do - a = StaticArray(Int32, 3).new { |i| i + 1 } - a.fill(0).should eq(StaticArray[0, 0, 0]) - a.should eq(StaticArray[0, 0, 0]) - a.fill(2).should eq(StaticArray[2, 2, 2]) - a.should eq(StaticArray[2, 2, 2]) - end + describe "#fill" do + it "replaces all values, without block" do + a = StaticArray(Int32, 3).new { |i| i + 1 } + expected = StaticArray[0, 0, 0] + a.fill(0).should eq(expected) + a.should eq(expected) + + expected = StaticArray[2, 2, 2] + a.fill(2).should eq(expected) + a.should eq(expected) + end - it "does #fill, with block" do - a = StaticArray(Int32, 4).new { |i| i + 1 } - a.fill { |i| i * i }.should eq(StaticArray[0, 1, 4, 9]) - a.should eq(StaticArray[0, 1, 4, 9]) + it "replaces all values, with block" do + a = StaticArray(Int32, 4).new { |i| i + 1 } + expected = StaticArray[0, 1, 4, 9] + a.fill { |i| i * i }.should eq(expected) + a.should eq(expected) + end end it "shuffles" do diff --git a/spec/std/xml/xml_spec.cr b/spec/std/xml/xml_spec.cr index f1c3e684f8e4..a2a6429db395 100644 --- a/spec/std/xml/xml_spec.cr +++ b/spec/std/xml/xml_spec.cr @@ -168,45 +168,198 @@ describe XML do errors[0].to_s.should eq("Opening and ending tag mismatch: people line 1 and foo") end - it "gets root namespaces scopes" do - doc = XML.parse(<<-XML - - - - XML - ) - namespaces = doc.root.not_nil!.namespace_scopes + describe "#namespace" do + describe "when the node has a namespace" do + describe "with a prefix" do + it "return the prefixed namespace" do + doc = XML.parse(<<-XML) + + + XML + + namespace = doc.root.not_nil!.namespace.should be_a XML::Namespace + namespace.href.should eq "http://a9.com/-/spec/opensearchrss/1.0/" + namespace.prefix.should eq "openSearch" + end + end + + describe "with a default prefix" do + it "return the default namespace" do + doc = XML.parse(<<-XML) + + + XML + + namespace = doc.root.not_nil!.namespace.should be_a XML::Namespace + namespace.href.should eq "http://a9.com/-/spec/opensearchrss/1.0/" + namespace.prefix.should be_nil + end + end + + describe "without an explicit declaration on the node" do + it "returns the related namespace" do + doc = XML.parse(<<-XML) + + + + + + XML + + root = doc.root.not_nil! + + namespace = root.children[1].namespace.should be_a XML::Namespace + namespace.href.should eq "http://www.w3.org/2005/Atom" + namespace.prefix.should be_nil + + namespace = root.children[3].namespace.should be_a XML::Namespace + namespace.href.should eq "https://a-namespace" + namespace.prefix.should eq "a" + end + end + end + + describe "when the node does not have namespace" do + it "should return nil" do + doc = XML.parse(<<-XML) + + + XML + + doc.root.not_nil!.namespace.should be_nil + end + end - namespaces.size.should eq(2) - namespaces[0].href.should eq("http://www.w3.org/2005/Atom") - namespaces[0].prefix.should be_nil - namespaces[1].href.should eq("http://a9.com/-/spec/opensearchrss/1.0/") - namespaces[1].prefix.should eq("openSearch") + describe "when the element does not have a namespace, but has namespace declarations" do + it "should return nil" do + doc = XML.parse(<<-XML) + + + XML + + doc.root.not_nil!.namespace.should be_nil + end + end end - it "returns empty array if no namespaces scopes exists" do - doc = XML.parse(<<-XML - - John - XML - ) - namespaces = doc.root.not_nil!.namespace_scopes + describe "#namespace_definitions" do + it "returns namespaces explicitly defined" do + doc = XML.parse(<<-XML) + + + + + XML - namespaces.size.should eq(0) + namespaces = doc.root.not_nil!.first_element_child.not_nil!.namespace_definitions + + namespaces.size.should eq(1) + namespaces[0].href.should eq("http://c") + namespaces[0].prefix.should eq "c" + end + + it "returns an empty array if no namespaces are defined" do + doc = XML.parse(<<-XML) + + + + + XML + + doc.root.not_nil!.first_element_child.not_nil!.namespace_definitions.should be_empty + end end - it "gets root namespaces as hash" do - doc = XML.parse(<<-XML - - - - XML - ) - namespaces = doc.root.not_nil!.namespaces - namespaces.should eq({ - "xmlns" => "http://www.w3.org/2005/Atom", - "xmlns:openSearch": "http://a9.com/-/spec/opensearchrss/1.0/", - }) + describe "#namespace_scopes" do + it "gets root namespaces scopes" do + doc = XML.parse(<<-XML) + + + + XML + + namespaces = doc.root.not_nil!.namespace_scopes + + namespaces.size.should eq(2) + namespaces[0].href.should eq("http://www.w3.org/2005/Atom") + namespaces[0].prefix.should be_nil + namespaces[1].href.should eq("http://a9.com/-/spec/opensearchrss/1.0/") + namespaces[1].prefix.should eq("openSearch") + end + + it "returns empty array if no namespaces scopes exists" do + doc = XML.parse(<<-XML) + + John + XML + + namespaces = doc.root.not_nil!.namespace_scopes + + namespaces.size.should eq(0) + end + + it "includes parent namespaces" do + doc = XML.parse(<<-XML) + + + + + XML + + namespaces = doc.root.not_nil!.first_element_child.not_nil!.namespace_scopes + + namespaces.size.should eq(3) + namespaces[0].href.should eq("http://c") + namespaces[0].prefix.should eq "c" + namespaces[1].href.should eq("http://www.w3.org/2005/Atom") + namespaces[1].prefix.should be_nil + namespaces[2].href.should eq("http://a9.com/-/spec/opensearchrss/1.0/") + namespaces[2].prefix.should eq("openSearch") + end + end + + describe "#namespaces" do + it "gets root namespaces as hash" do + doc = XML.parse(<<-XML) + + + + XML + + namespaces = doc.root.not_nil!.namespaces + namespaces.should eq({ + "xmlns" => "http://www.w3.org/2005/Atom", + "xmlns:openSearch" => "http://a9.com/-/spec/opensearchrss/1.0/", + }) + end + + it "includes parent namespaces" do + doc = XML.parse(<<-XML) + + + + + XML + + namespaces = doc.root.not_nil!.first_element_child.not_nil!.namespaces + namespaces.should eq({ + "xmlns:c" => "http://c", + "xmlns" => "http://www.w3.org/2005/Atom", + "xmlns:openSearch" => "http://a9.com/-/spec/opensearchrss/1.0/", + }) + end + + it "returns an empty hash if there are no namespaces" do + doc = XML.parse(<<-XML) + + + + + XML + + namespaces = doc.root.not_nil!.first_element_child.not_nil!.namespaces + namespaces.should eq({} of String => String?) + end end it "reads big xml file (#1455)" do @@ -217,11 +370,11 @@ describe XML do end it "sets node text/content" do - doc = XML.parse(<<-XML + doc = XML.parse(<<-XML) John XML - ) + root = doc.root.not_nil! root.text = "Peter" root.text.should eq("Peter") @@ -231,11 +384,11 @@ describe XML do end it "doesn't set invalid node content" do - doc = XML.parse(<<-XML + doc = XML.parse(<<-XML) John XML - ) + root = doc.root.not_nil! expect_raises(Exception, "Cannot escape") do root.content = "\0" diff --git a/src/array.cr b/src/array.cr index 066077c5ecff..30b9eb51057d 100644 --- a/src/array.cr +++ b/src/array.cr @@ -883,9 +883,8 @@ class Array(T) # a = [1, 2, 3, 4] # a.fill { |i| i * i } # => [0, 1, 4, 9] # ``` - def fill - each_index { |i| @buffer[i] = yield i } - + def fill(& : Int32 -> T) : self + to_unsafe_slice.fill { |i| yield i } self end @@ -900,12 +899,12 @@ class Array(T) # a = [1, 2, 3, 4] # a.fill(2) { |i| i * i } # => [1, 2, 4, 9] # ``` - def fill(from : Int) + def fill(from : Int, & : Int32 -> T) : self from += size if from < 0 raise IndexError.new unless 0 <= from < size - from.upto(size - 1) { |i| @buffer[i] = yield i } + to_unsafe_slice(from, size - from).fill(offset: from) { |i| yield i } self end @@ -923,14 +922,14 @@ class Array(T) # a = [1, 2, 3, 4, 5, 6] # a.fill(2, 2) { |i| i * i } # => [1, 2, 4, 9, 5, 6] # ``` - def fill(from : Int, count : Int) + def fill(from : Int, count : Int, & : Int32 -> T) : self return self if count <= 0 from += size if from < 0 raise IndexError.new unless 0 <= from < size && from + count <= size - from.upto(from + count - 1) { |i| @buffer[i] = yield i } + to_unsafe_slice(from, count).fill(offset: from) { |i| yield i } self end @@ -942,7 +941,7 @@ class Array(T) # a = [1, 2, 3, 4, 5, 6] # a.fill(2..3) { |i| i * i } # => [1, 2, 4, 9, 5, 6] # ``` - def fill(range : Range) + def fill(range : Range, & : Int32 -> T) : self fill(*Indexable.range_to_index_and_count(range, size) || raise IndexError.new) do |i| yield i end @@ -954,15 +953,9 @@ class Array(T) # a = [1, 2, 3] # a.fill(9) # => [9, 9, 9] # ``` - def fill(value : T) - {% if Number::Primitive.union_types.includes?(T) %} - if value == 0 - to_unsafe.clear(size) - return self - end - {% end %} - - fill { value } + def fill(value : T) : self + to_unsafe_slice.fill(value) + self end # Replaces every element in `self`, starting at *from*, with the given *value*. Returns `self`. @@ -973,22 +966,14 @@ class Array(T) # a = [1, 2, 3, 4, 5] # a.fill(9, 2) # => [1, 2, 9, 9, 9] # ``` - def fill(value : T, from : Int) - {% if Number::Primitive.union_types.includes?(T) %} - if value == 0 - from += size if from < 0 + def fill(value : T, from : Int) : self + from += size if from < 0 - raise IndexError.new unless 0 <= from < size + raise IndexError.new unless 0 <= from < size - (to_unsafe + from).clear(size - from) + to_unsafe_slice(from, size - from).fill(value) - self - else - fill(from) { value } - end - {% else %} - fill(from) { value } - {% end %} + self end # Replaces every element in `self`, starting at *from* and only *count* times, @@ -1000,24 +985,16 @@ class Array(T) # a = [1, 2, 3, 4, 5] # a.fill(9, 2, 2) # => [1, 2, 9, 9, 5] # ``` - def fill(value : T, from : Int, count : Int) - {% if Number::Primitive.union_types.includes?(T) %} - if value == 0 - return self if count <= 0 + def fill(value : T, from : Int, count : Int) : self + return self if count <= 0 - from += size if from < 0 + from += size if from < 0 - raise IndexError.new unless 0 <= from < size && from + count <= size + raise IndexError.new unless 0 <= from < size && from + count <= size - (to_unsafe + from).clear(count) + to_unsafe_slice(from, count).fill(value) - self - else - fill(from, count) { value } - end - {% else %} - fill(from, count) { value } - {% end %} + self end # Replaces every element in *range* with *value*. Returns `self`. @@ -1028,18 +1005,8 @@ class Array(T) # a = [1, 2, 3, 4, 5] # a.fill(9, 2..3) # => [1, 2, 9, 9, 5] # ``` - def fill(value : T, range : Range) - {% if Number::Primitive.union_types.includes?(T) %} - if value == 0 - fill(value, *Indexable.range_to_index_and_count(range, size) || raise IndexError.new) - - self - else - fill(range) { value } - end - {% else %} - fill(range) { value } - {% end %} + def fill(value : T, range : Range) : self + fill(value, *Indexable.range_to_index_and_count(range, size) || raise IndexError.new) end # Returns the first *n* elements of the array. @@ -1279,7 +1246,7 @@ class Array(T) end def self.product(arrays : Array(Array)) - result = [] of Array(typeof(arrays.first.first)) + result = [] of Array(typeof(Enumerable.element_type Enumerable.element_type arrays)) each_product(arrays) do |product| result << product end @@ -1503,7 +1470,7 @@ class Array(T) # Reverses in-place all the elements of `self`. def reverse! - Slice.new(@buffer, size).reverse! + to_unsafe_slice.reverse! self end @@ -1704,8 +1671,16 @@ class Array(T) # a.sort # => [1, 2, 3] # a # => [3, 1, 2] # ``` - def sort(*, stable : Bool = true) : Array(T) - dup.sort!(stable: stable) + def sort : Array(T) + dup.sort! + end + + # :ditto: + # + # This method does not guarantee stability between equally sorting elements. + # Which results in a performance advantage over stable sort. + def unstable_sort : Array(T) + dup.unstable_sort! end # Returns a new array with all elements sorted based on the comparator in the @@ -1722,12 +1697,24 @@ class Array(T) # b # => [3, 2, 1] # a # => [3, 1, 2] # ``` - def sort(*, stable : Bool = true, &block : T, T -> U) : Array(T) forall U + def sort(&block : T, T -> U) : Array(T) forall U + {% unless U <= Int32? %} + {% raise "expected block to return Int32 or Nil, not #{U}" %} + {% end %} + + dup.sort! &block + end + + # :ditto: + # + # This method does not guarantee stability between equally sorting elements. + # Which results in a performance advantage over stable sort. + def unstable_sort(&block : T, T -> U) : Array(T) forall U {% unless U <= Int32? %} {% raise "expected block to return Int32 or Nil, not #{U}" %} {% end %} - dup.sort!(stable: stable, &block) + dup.unstable_sort!(&block) end # Modifies `self` by sorting all elements based on the return value of their @@ -1738,8 +1725,17 @@ class Array(T) # a.sort! # a # => [1, 2, 3] # ``` - def sort!(*, stable : Bool = true) : Array(T) - Slice.new(to_unsafe, size).sort!(stable: stable) + def sort! : Array(T) + to_unsafe_slice.sort! + self + end + + # :ditto: + # + # This method does not guarantee stability between equally sorting elements. + # Which results in a performance advantage over stable sort. + def unstable_sort! : Array(T) + to_unsafe_slice.unstable_sort! self end @@ -1756,12 +1752,25 @@ class Array(T) # a.sort! { |a, b| b <=> a } # a # => [3, 2, 1] # ``` - def sort!(*, stable : Bool = true, &block : T, T -> U) : Array(T) forall U + def sort!(&block : T, T -> U) : Array(T) forall U + {% unless U <= Int32? %} + {% raise "expected block to return Int32 or Nil, not #{U}" %} + {% end %} + + to_unsafe_slice.sort!(&block) + self + end + + # :ditto: + # + # This method does not guarantee stability between equally sorting elements. + # Which results in a performance advantage over stable sort. + def unstable_sort!(&block : T, T -> U) : Array(T) forall U {% unless U <= Int32? %} {% raise "expected block to return Int32 or Nil, not #{U}" %} {% end %} - Slice.new(to_unsafe, size).sort!(stable: stable, &block) + to_unsafe_slice.unstable_sort!(&block) self end @@ -1775,8 +1784,16 @@ class Array(T) # b # => ["fig", "pear", "apple"] # a # => ["apple", "pear", "fig"] # ``` - def sort_by(*, stable : Bool = true, &block : T -> _) : Array(T) - dup.sort_by!(stable: stable) { |e| yield(e) } + def sort_by(&block : T -> _) : Array(T) + dup.sort_by! { |e| yield(e) } + end + + # :ditto: + # + # This method does not guarantee stability between equally sorting elements. + # Which results in a performance advantage over stable sort. + def unstable_sort_by(&block : T -> _) : Array(T) + dup.unstable_sort_by! { |e| yield(e) } end # Modifies `self` by sorting all elements. The given block is called for @@ -1788,8 +1805,20 @@ class Array(T) # a.sort_by! { |word| word.size } # a # => ["fig", "pear", "apple"] # ``` - def sort_by!(*, stable : Bool = true, &block : T -> _) : Array(T) - sorted = map { |e| {e, yield(e)} }.sort!(stable: stable) { |x, y| x[1] <=> y[1] } + def sort_by!(&block : T -> _) : Array(T) + sorted = map { |e| {e, yield(e)} }.sort! { |x, y| x[1] <=> y[1] } + @size.times do |i| + @buffer[i] = sorted.to_unsafe[i][0] + end + self + end + + # :ditto: + # + # This method does not guarantee stability between equally sorting elements. + # Which results in a performance advantage over stable sort. + def unstable_sort_by!(&block : T -> _) : Array(T) + sorted = map { |e| {e, yield(e)} }.unstable_sort! { |x, y| x[1] <=> y[1] } @size.times do |i| @buffer[i] = sorted.to_unsafe[i][0] end @@ -1857,7 +1886,7 @@ class Array(T) # a # => [[:a, :b], [:c, :d], [:e, :f]] # ``` def transpose - return Array(Array(typeof(first.first))).new if empty? + return Array(Array(typeof(Enumerable.element_type Enumerable.element_type self))).new if empty? len = self[0].size (1...@size).each do |i| @@ -1865,8 +1894,8 @@ class Array(T) raise IndexError.new if len != l end - Array(Array(typeof(first.first))).new(len) do |i| - Array(typeof(first.first)).new(@size) do |j| + Array(Array(typeof(Enumerable.element_type Enumerable.element_type self))).new(len) do |i| + Array(typeof(Enumerable.element_type Enumerable.element_type self)).new(@size) do |j| self[j][i] end end @@ -2217,6 +2246,15 @@ class Array(T) @offset_to_buffer = 0 end + private def to_unsafe_slice + Slice.new(@buffer, size) + end + + private def to_unsafe_slice(index : Int, count : Int) + index, count = normalize_start_and_count(index, count) + Slice.new(@buffer + index, count) + end + protected def to_lookup_hash to_lookup_hash { |elem| elem } end @@ -2235,7 +2273,7 @@ class Array(T) # Optimize for the case of looking for a byte in a byte slice if T.is_a?(UInt8.class) && (object.is_a?(UInt8) || (object.is_a?(Int) && 0 <= object < 256)) - return Slice.new(to_unsafe, size).fast_index(object, offset) + return to_unsafe_slice.fast_index(object, offset) end super diff --git a/src/benchmark/ips.cr b/src/benchmark/ips.cr index 197eb0968366..47229a1536a1 100644 --- a/src/benchmark/ips.cr +++ b/src/benchmark/ips.cr @@ -172,12 +172,12 @@ module Benchmark cycles.times { action.call } end - def set_cycles(duration, iterations) + def set_cycles(duration, iterations) : Nil @cycles = (iterations / duration.total_milliseconds * 100).to_i @cycles = 1 if cycles <= 0 end - def calculate_stats(samples) + def calculate_stats(samples) : Nil @ran = true @size = samples.size @mean = samples.sum.to_f / size.to_f diff --git a/src/big/big_int.cr b/src/big/big_int.cr index c5625d6a1042..295c42263258 100644 --- a/src/big/big_int.cr +++ b/src/big/big_int.cr @@ -410,36 +410,129 @@ struct BigInt < Int # TODO: check hash equality for numbers >= 2**63 def_hash to_i64! - # Returns a string representation of self. - # - # ``` - # require "big" - # - # BigInt.new("123456789101101987654321").to_s # => 123456789101101987654321 - # ``` - def to_s : String - String.new(to_cstr) + def to_s(base : Int = 10, *, precision : Int = 1, upcase : Bool = false) : String + raise ArgumentError.new("Invalid base #{base}") unless 2 <= base <= 36 || base == 62 + raise ArgumentError.new("upcase must be false for base 62") if upcase && base == 62 + raise ArgumentError.new("Precision must be non-negative") unless precision >= 0 + + case {self, precision} + when {0, 0} + "" + when {0, 1} + "0" + when {1, 1} + "1" + else + count = LibGMP.sizeinbase(self, base).to_i + negative = self < 0 + + if precision <= count + len = count + (negative ? 1 : 0) + String.new(len + 1) do |buffer| # null terminator required by GMP + buffer[len - 1] = 0 + LibGMP.get_str(buffer, upcase ? -base : base, self) + + # `sizeinbase` may be 1 greater than the exact value + if buffer[len - 1] == 0 + if precision == count + # In this case the exact `count` is `precision - 1`, i.e. one zero + # should be inserted at the beginning of the number + # e.g. precision = 3, count = 3, exact count = 2 + # "85\0\0" -> "085\0" for positive + # "-85\0\0" -> "-085\0" for negative + start = buffer + (negative ? 1 : 0) + start.move_to(start + 1, count - 1) + start.value = '0'.ord.to_u8 + else + len -= 1 + end + end + + base62_swapcase(Slice.new(buffer, len)) if base == 62 + {len, len} + end + else + len = precision + (negative ? 1 : 0) + String.new(len + 1) do |buffer| + # e.g. precision = 13, count = 8 + # "_____12345678\0" for positive + # "_____-12345678\0" for negative + buffer[len - 1] = 0 + start = buffer + precision - count + LibGMP.get_str(start, upcase ? -base : base, self) + + # `sizeinbase` may be 1 greater than the exact value + if buffer[len - 1] == 0 + # e.g. precision = 7, count = 3, exact count = 2 + # "____85\0\0" -> "____885\0" for positive + # "____-85\0\0" -> "____-885\0" for negative + # `start` will be zero-filled later + count -= 1 + start += 1 if negative + start.move_to(start + 1, count) + end + + base62_swapcase(Slice.new(buffer + len - count, count)) if base == 62 + + if negative + buffer.value = '-'.ord.to_u8 + buffer += 1 + end + Slice.new(buffer, precision - count).fill('0'.ord.to_u8) + + {len, len} + end + end + end end - # :ditto: - def to_s(io : IO) : Nil - str = to_cstr - io.write_utf8 Slice.new(str, LibC.strlen(str)) + def to_s(io : IO, base : Int = 10, *, precision : Int = 1, upcase : Bool = false) : Nil + raise ArgumentError.new("Invalid base #{base}") unless 2 <= base <= 36 || base == 62 + raise ArgumentError.new("upcase must be false for base 62") if upcase && base == 62 + raise ArgumentError.new("Precision must be non-negative") unless precision >= 0 + + case {self, precision} + when {0, 0} + # do nothing + when {0, 1} + io << '0' + when {1, 1} + io << '1' + else + count = LibGMP.sizeinbase(self, base).to_i + ptr = LibGMP.get_str(nil, upcase ? -base : base, self) + negative = self < 0 + + # `sizeinbase` may be 1 greater than the exact value + count -= 1 if ptr[count + (negative ? 0 : -1)] == 0 + + if precision <= count + buffer = Slice.new(ptr, count + (negative ? 1 : 0)) + else + if negative + io << '-' + ptr += 1 # this becomes the absolute value + end + + (precision - count).times { io << '0' } + buffer = Slice.new(ptr, count) + end + + base62_swapcase(buffer) if base == 62 + io.write_string buffer + end end - # Returns a string containing the representation of big radix base (2 through 36). - # - # ``` - # require "big" - # - # BigInt.new("123456789101101987654321").to_s(8) # => "32111154373025463465765261" - # BigInt.new("123456789101101987654321").to_s(16) # => "1a249b1f61599cd7eab1" - # BigInt.new("123456789101101987654321").to_s(36) # => "k3qmt029k48nmpd" - # ``` - def to_s(base : Int) : String - raise ArgumentError.new("Invalid base #{base}") unless 2 <= base <= 36 - cstr = LibGMP.get_str(nil, base, self) - String.new(cstr) + private def base62_swapcase(buffer) + buffer.map! do |x| + # for ASCII integers as returned by GMP the only possible characters are + # '\0', '-', '0'..'9', 'A'..'Z', and 'a'..'z' + if x & 0x40 != 0 # 'A'..'Z', 'a'..'z' + x ^ 0x20 + else # '\0', '-', '0'..'9' + x + end + end end # :nodoc: @@ -606,10 +699,6 @@ struct BigInt < Int pointerof(@mpz) end - private def to_cstr - LibGMP.get_str(nil, 10, mpz) - end - def to_unsafe mpz end diff --git a/src/big/big_rational.cr b/src/big/big_rational.cr index e8cc2008999b..a2a3e884ede8 100644 --- a/src/big/big_rational.cr +++ b/src/big/big_rational.cr @@ -259,7 +259,7 @@ struct BigRational < Number def to_s(io : IO, base : Int = 10) : Nil str = to_cstr(base) - io.write_utf8 Slice.new(str, LibC.strlen(str)) + io.write_string Slice.new(str, LibC.strlen(str)) end def inspect : String diff --git a/src/big/json.cr b/src/big/json.cr index 44d4ba6726dc..26682c6e3698 100644 --- a/src/big/json.cr +++ b/src/big/json.cr @@ -3,7 +3,7 @@ require "big" class JSON::Builder # Writes a big decimal. - def number(number : BigDecimal) + def number(number : BigDecimal) : Nil scalar do @io << number end @@ -26,7 +26,7 @@ struct BigInt to_s end - def to_json(json : JSON::Builder) + def to_json(json : JSON::Builder) : Nil json.number(self) end end @@ -56,7 +56,7 @@ struct BigFloat to_s end - def to_json(json : JSON::Builder) + def to_json(json : JSON::Builder) : Nil json.number(self) end end @@ -86,7 +86,7 @@ struct BigDecimal to_s end - def to_json(json : JSON::Builder) + def to_json(json : JSON::Builder) : Nil json.number(self) end end diff --git a/src/bit_array.cr b/src/bit_array.cr index 414f3159483a..bab6764eae51 100644 --- a/src/bit_array.cr +++ b/src/bit_array.cr @@ -121,7 +121,7 @@ struct BitArray bits = @bits[0] bits >>= start - bits &= (1 << count) - 1 + bits &= ~(UInt32::MAX << count) BitArray.new(count).tap { |ba| ba.@bits[0] = bits } elsif size <= 64 @@ -129,10 +129,10 @@ struct BitArray bits = @bits.as(UInt64*)[0] bits >>= start - bits &= (1 << count) - 1 + bits &= ~(UInt64::MAX << count) if count <= 32 - BitArray.new(count).tap { |ba| ba.@bits[0] = bits.to_u32 } + BitArray.new(count).tap { |ba| ba.@bits[0] = bits.to_u32! } else BitArray.new(count).tap { |ba| ba.@bits.as(UInt64*)[0] = bits } end @@ -150,7 +150,7 @@ struct BitArray bits = @bits[start_bit_index + i + 1] high_bits = bits - high_bits &= (1 << start_sub_index) - 1 + high_bits &= ~(UInt32::MAX << start_sub_index) high_bits <<= 32 - start_sub_index ba.@bits[i] = low_bits | high_bits @@ -164,10 +164,13 @@ struct BitArray end end - # Toggles the bit at the given *index*. A false bit becomes a `true` bit, and - # vice versa. - # Negative indices can be used to start counting from the end of the array. - # Raises `IndexError` if trying to access a bit outside the array's range. + # Toggles the bit at the given *index*. A `false` bit becomes a `true` bit, + # and vice versa. + # + # Negative indices count backward from the end of the array (-1 is the last + # element). + # + # Raises `IndexError` if *index* is out of range. # # ``` # require "bit_array" @@ -177,11 +180,72 @@ struct BitArray # ba.toggle(3) # ba[3] # => true # ``` - def toggle(index) + def toggle(index) : Nil bit_index, sub_index = bit_index_and_sub_index(index) @bits[bit_index] ^= 1 << sub_index end + # Toggles all bits that are within the given *range*. A `false` bit becomes a + # `true` bit, and vice versa. + # + # Negative indices count backward from the end of the array (-1 is the last + # element). + # + # Raises `IndexError` if the starting index is out of range. + # + # ``` + # require "bit_array" + # + # ba = BitArray.new(5) + # ba.to_s # => "BitArray[00000]" + # ba.toggle(1..-2) + # ba.to_s # => "BitArray[01110]" + # ``` + def toggle(range : Range) + toggle(*Indexable.range_to_index_and_count(range, size) || raise IndexError.new) + end + + # Toggles *count* or less (if there aren't enough) bits starting at the given + # *start* index. A `false` bit becomes a `true` bit, and vice versa. + # + # Negative indices count backward from the end of the array (-1 is the last + # element). + # + # Raises `IndexError` if *index* is out of range. + # Raises `ArgumentError` if *count* is a negative number. + # + # ``` + # require "bit_array" + # + # ba = BitArray.new(5) + # ba.to_s # => "BitArray[00000]" + # ba.toggle(1, 3) + # ba.to_s # => "BitArray[01110]" + # ``` + def toggle(start : Int, count : Int) + start, count = normalize_start_and_count(start, count) + + start_bit_index, start_sub_index = start.divmod(32) + end_bit_index, end_sub_index = (start + count - 1).divmod(32) + + if start_bit_index == end_bit_index + # same UInt32, don't perform the loop at all + @bits[start_bit_index] ^= uint32_mask(start_sub_index, end_sub_index) + else + @bits[start_bit_index] ^= uint32_mask(start_sub_index, 31) + (start_bit_index + 1..end_bit_index - 1).each do |i| + @bits[i] = ~@bits[i] + end + @bits[end_bit_index] ^= uint32_mask(0, end_sub_index) + end + end + + # returns (1 << from) | (1 << (from + 1)) | ... | (1 << to) + @[AlwaysInline] + private def uint32_mask(from, to) + (Int32::MIN >> (to - from)).to_u32! >> (31 - to) + end + # Inverts all bits in the array. Falses become `true` and vice versa. # # ``` @@ -193,7 +257,7 @@ struct BitArray # ba.invert # ba # => BitArray[11001] # ``` - def invert + def invert : Nil malloc_size.times do |i| @bits[i] = ~@bits[i] end @@ -259,7 +323,7 @@ struct BitArray protected def clear_unused_bits # There are no unused bits if `size` is a multiple of 32. bit_index, sub_index = @size.divmod(32) - @bits[bit_index] &= (1 << sub_index) - 1 unless sub_index == 0 + @bits[bit_index] &= ~(UInt32::MAX << sub_index) unless sub_index == 0 end private def bytesize diff --git a/src/char.cr b/src/char.cr index f081318335df..319de3188e73 100644 --- a/src/char.cr +++ b/src/char.cr @@ -125,8 +125,8 @@ struct Char # Performs a `#step` in the direction of the _limit_. For instance: # # ``` - # 'd'.step(to: 'a').to_a # => ['d', 'c', 'b', 'a'] - # 'a'.step(to: 'd').to_a # => ['a', 'b', 'c', 'd'] + # 'd'.step(to: 'a').to_a # => ['d', 'c', 'b', 'a'] + # 'a'.step(to: 'd').to_a # => ['a', 'b', 'c', 'd'] # ``` def step(*, to limit = nil, exclusive : Bool = false, &) if limit @@ -799,7 +799,7 @@ struct Char # Optimization: writing a slice is much slower than writing a byte if io.has_non_utf8_encoding? - io.write_utf8 Slice.new(pointerof(byte), 1) + io.write_string Slice.new(pointerof(byte), 1) else io.write_byte byte end @@ -810,7 +810,7 @@ struct Char chars[i] = byte i += 1 end - io.write_utf8 chars.to_slice[0, i] + io.write_string chars.to_slice[0, i] end end diff --git a/src/compiler/crystal/codegen/call.cr b/src/compiler/crystal/codegen/call.cr index 4523c7a965ef..22366b095874 100644 --- a/src/compiler/crystal/codegen/call.cr +++ b/src/compiler/crystal/codegen/call.cr @@ -586,12 +586,12 @@ class Crystal::CodeGenVisitor abi_arg_type = abi_info.arg_types[i]? if abi_arg_type && (attr = abi_arg_type.attr) - @last.add_instruction_attribute(i + arg_offset, attr, llvm_context) + @last.add_instruction_attribute(i + arg_offset, attr, llvm_context, abi_arg_type.type) end end if sret - @last.add_instruction_attribute(1, LLVM::Attribute::StructRet, llvm_context) + @last.add_instruction_attribute(1, LLVM::Attribute::StructRet, llvm_context, abi_info.return_type.type) end end @@ -605,7 +605,7 @@ class Crystal::CodeGenVisitor arg_types = fun_type.try(&.arg_types) || target_def.try &.args.map &.type arg_types.try &.each_with_index do |arg_type, i| if abi_info && (abi_arg_type = abi_info.arg_types[i]?) && (attr = abi_arg_type.attr) - @last.add_instruction_attribute(i + arg_offset, attr, llvm_context) + @last.add_instruction_attribute(i + arg_offset, attr, llvm_context, abi_arg_type.type) end end end diff --git a/src/compiler/crystal/codegen/class_var.cr b/src/compiler/crystal/codegen/class_var.cr index b9573b9d54af..d2979dcf161d 100644 --- a/src/compiler/crystal/codegen/class_var.cr +++ b/src/compiler/crystal/codegen/class_var.cr @@ -92,6 +92,7 @@ class Crystal::CodeGenVisitor global = declare_class_var(class_var) global = ensure_class_var_in_this_module(global, class_var) if init_func + set_current_debug_location initializer.node if @debug.line_numbers? call init_func end return global @@ -121,6 +122,8 @@ class Crystal::CodeGenVisitor discard = false new_func = in_main do define_main_function(init_function_name, ([] of LLVM::Type), llvm_context.void, needs_alloca: true) do |func| + set_internal_fun_debug_location(func, init_function_name, node.location) + with_cloned_context do # "self" in a constant is the class_var owner context.type = class_var.owner @@ -223,6 +226,8 @@ class Crystal::CodeGenVisitor def create_read_virtual_class_var_ptr_function(fun_name, class_var, owner) in_main do define_main_function(fun_name, [llvm_context.int32], llvm_type(class_var.type).pointer) do |func| + set_internal_fun_debug_location(func, fun_name) + self_type_id = func.params[0] cmp = equal?(self_type_id, type_id(owner.base_type)) @@ -268,6 +273,8 @@ class Crystal::CodeGenVisitor def create_read_virtual_metaclass_var_ptr_function(fun_name, class_var, owner) in_main do define_main_function(fun_name, [llvm_context.int32], llvm_type(class_var.type).pointer) do |func| + set_internal_fun_debug_location(func, fun_name) + self_type_id = func.params[0] cmp = equal?(self_type_id, type_id(owner.base_type.metaclass)) @@ -313,6 +320,7 @@ class Crystal::CodeGenVisitor in_main do define_main_function(fun_name, ([] of LLVM::Type), llvm_type(class_var.type).pointer) do |func| + set_internal_fun_debug_location(func, fun_name, initializer.node.location) init_func = check_main_fun init_func.name, init_func ret lazy_initialize_class_var(initializer.node, init_func, global, initialized_flag) end diff --git a/src/compiler/crystal/codegen/codegen.cr b/src/compiler/crystal/codegen/codegen.cr index 2e2610b483b6..bcd0932f31b1 100644 --- a/src/compiler/crystal/codegen/codegen.cr +++ b/src/compiler/crystal/codegen/codegen.cr @@ -189,8 +189,6 @@ module Crystal @personality_name = "__crystal_personality" end - emit_main_def_debug_metadata(@main, "??") unless @debug.none? - @context = Context.new @main, @program @context.return_type = @main_ret_type @@ -207,6 +205,8 @@ module Crystal @modules = {"" => @main_module_info} of String => ModuleInfo @types_to_modules = {} of Type => ModuleInfo + set_internal_fun_debug_location(@main, MAIN_NAME, nil) + @alloca_block, @entry_block = new_entry_block_chain "alloca", "entry" @in_lib = false @@ -537,7 +537,7 @@ module Crystal end get_global class_var_global_name(node_exp.var), node_exp.type, node_exp.var when Global - get_global node_exp.name, node_exp.type, node_exp.var + node.raise "BUG: there should be no use of global variables other than $~ and $?" when Path # Make sure the constant is initialized before taking a pointer of it const = node_exp.target_const.not_nil! @@ -574,6 +574,7 @@ module Crystal the_fun = codegen_fun fun_literal_name, node.def, context.type, fun_module_info: @main_module_info, is_fun_literal: true, is_closure: is_closure the_fun = check_main_fun fun_literal_name, the_fun + set_current_debug_location(node) if @debug.line_numbers? fun_ptr = bit_cast(the_fun, llvm_context.void_pointer) if is_closure ctx_ptr = bit_cast(context.closure_ptr.not_nil!, llvm_context.void_pointer) @@ -1017,7 +1018,7 @@ module Crystal when InstanceVar instance_var_ptr context.type, target.name, llvm_self_ptr when Global - get_global target.name, target_type, target.var + node.raise "BUG: there should be no use of global variables other than $~ and $?" when ClassVar read_class_var_ptr(target) when Var @@ -1119,6 +1120,7 @@ module Crystal unless thread_local_fun thread_local_fun = in_main do define_main_function(fun_name, [llvm_type(type).pointer.pointer], llvm_context.void) do |func| + set_internal_fun_debug_location(func, fun_name, real_var.location) builder.store get_global_var(name, type, real_var), func.params[0] builder.ret end @@ -1143,15 +1145,7 @@ module Crystal codegen_assign(var, value, node) end when Global - if value = node.value - request_value do - accept value - end - - ptr = get_global var.name, var.type, var.var - assign ptr, var.type, value.type, @last - return false - end + node.raise "BUG: there should be no use of global variables other than $~ and $?" when ClassVar # This is the case of a class var initializer initialize_class_var(var) @@ -1208,18 +1202,13 @@ module Crystal end def visit(node : Global) - read_global node.name.to_s, node.type, node.var + node.raise "BUG: there should be no use of global variables other than $~ and $?" end def visit(node : ClassVar) @last = read_class_var(node) end - def read_global(name, type, real_var) - @last = get_global name, type, real_var - @last = to_lhs @last, type - end - def visit(node : InstanceVar) read_instance_var node.type, context.type, node.name, llvm_self_ptr end @@ -1608,6 +1597,8 @@ module Crystal def create_check_proc_is_not_closure_fun(fun_name) in_main do define_main_function(fun_name, [llvm_typer.proc_type], llvm_context.void_pointer) do |func| + set_internal_fun_debug_location(func, fun_name) + param = func.params.first fun_ptr = extract_value param, 0 @@ -1706,6 +1697,16 @@ module Crystal end end + # used for generated internal functions like `~metaclass` and `~match` + def set_internal_fun_debug_location(func, name, location = nil) + return if @debug.none? + location ||= UNKNOWN_LOCATION + emit_fun_debug_metadata(func, name, location) + set_current_debug_location(location) if @debug.line_numbers? + end + + private UNKNOWN_LOCATION = Location.new("??", 0, 0) + def llvm_self(type = context.type) self_var = context.vars["self"]? if self_var @@ -2255,7 +2256,7 @@ module Crystal end def visit(node : ExpandableNode) - raise "BUG: #{node} at #{node.location} should have been expanded" + raise "BUG: #{node} (#{node.class}) at #{node.location} should have been expanded" end def visit(node : ASTNode) diff --git a/src/compiler/crystal/codegen/const.cr b/src/compiler/crystal/codegen/const.cr index 48a053619a60..80b91f2b7170 100644 --- a/src/compiler/crystal/codegen/const.cr +++ b/src/compiler/crystal/codegen/const.cr @@ -72,6 +72,8 @@ class Crystal::CodeGenVisitor end def initialize_simple_const(const) + set_current_debug_location const.locations.try &.first? if @debug.line_numbers? + global = declare_const(const) request_value do accept const.value @@ -100,6 +102,8 @@ class Crystal::CodeGenVisitor # Start with fresh variables context.vars = LLVMVars.new + set_current_debug_location const.locations.try &.first? if @debug.line_numbers? + alloca_vars const.fake_def.try(&.vars), const.fake_def request_value do accept const.value @@ -146,6 +150,8 @@ class Crystal::CodeGenVisitor in_main do define_main_function(fun_name, ([] of LLVM::Type), llvm_context.void, needs_alloca: true) do |func| + set_internal_fun_debug_location(func, fun_name, const.locations.try &.first?) + with_cloned_context do # "self" in a constant is the constant's namespace context.type = const.namespace @@ -228,6 +234,7 @@ class Crystal::CodeGenVisitor def create_read_const_function(fun_name, const) in_main do define_main_function(fun_name, ([] of LLVM::Type), llvm_type(const.value.type).pointer) do |func| + set_internal_fun_debug_location(func, fun_name, const.locations.try &.first?) global = initialize_const(const) ret global end diff --git a/src/compiler/crystal/codegen/debug.cr b/src/compiler/crystal/codegen/debug.cr index fb80af027025..8181f40ea7ee 100644 --- a/src/compiler/crystal/codegen/debug.cr +++ b/src/compiler/crystal/codegen/debug.cr @@ -451,25 +451,25 @@ module Crystal builder.set_current_debug_location(0, 0, nil) end - def emit_main_def_debug_metadata(main_fun, filename) + def emit_fun_debug_metadata(func, fun_name, location, *, debug_types = [] of LibLLVMExt::Metadata, is_optimized = false) + filename = location.try(&.original_filename) || "??" + line_number = location.try(&.line_number) || 0 + file, dir = file_and_dir(filename) scope = di_builder.create_file(file, dir) - fn_metadata = di_builder.create_function(scope, MAIN_NAME, MAIN_NAME, scope, - 0, fun_metadata_type, true, true, 0, LLVM::DIFlags::Zero, false, main_fun) - fun_metadatas[main_fun] = [FunMetadata.new(filename || "??", fn_metadata)] + fn_metadata = di_builder.create_function(scope, fun_name, fun_name, scope, + line_number, fun_metadata_type(debug_types), true, true, + line_number, LLVM::DIFlags::Zero, is_optimized, func) + fun_metadatas[func] = [FunMetadata.new(filename, fn_metadata)] end def emit_def_debug_metadata(target_def) location = target_def.location.try &.expanded_location return unless location - file, dir = file_and_dir(location.filename) - scope = di_builder.create_file(file, dir) - is_optimised = !@debug.variables? - fn_metadata = di_builder.create_function(scope, target_def.name, target_def.name, scope, - location.line_number, fun_metadata_type(context.fun_debug_params), true, true, - location.line_number, LLVM::DIFlags::Zero, is_optimised, context.fun) - fun_metadatas[context.fun] = [FunMetadata.new(location.original_filename || "??", fn_metadata)] + emit_fun_debug_metadata(context.fun, target_def.name, location, + debug_types: context.fun_debug_params, + is_optimized: !@debug.variables?) end def declare_debug_for_function_argument(arg_name, arg_type, arg_no, alloca, location) diff --git a/src/compiler/crystal/codegen/fun.cr b/src/compiler/crystal/codegen/fun.cr index 38609fc93a01..dbcc0964bd05 100644 --- a/src/compiler/crystal/codegen/fun.cr +++ b/src/compiler/crystal/codegen/fun.cr @@ -84,6 +84,7 @@ class Crystal::CodeGenVisitor needs_body = !target_def.is_a?(External) || is_exported_fun if needs_body emit_def_debug_metadata target_def unless @debug.none? + set_current_debug_location target_def if @debug.line_numbers? context.fun.add_attribute LLVM::Attribute::UWTable if @program.has_flag?("darwin") @@ -365,7 +366,7 @@ class Crystal::CodeGenVisitor abi_arg_type = abi_info.arg_types[i] if attr = abi_arg_type.attr - context.fun.add_attribute(attr, i + offset + 1) + context.fun.add_attribute(attr, i + offset + 1, abi_arg_type.type) end i += 1 unless abi_arg_type.kind == LLVM::ABI::ArgKind::Ignore @@ -373,7 +374,7 @@ class Crystal::CodeGenVisitor # This is for sret if (attr = abi_info.return_type.attr) && attr == LLVM::Attribute::StructRet - context.fun.add_attribute(attr, 1) + context.fun.add_attribute(attr, 1, abi_info.return_type.type) end args diff --git a/src/compiler/crystal/codegen/link.cr b/src/compiler/crystal/codegen/link.cr index 86f021a4247b..13994aa32905 100644 --- a/src/compiler/crystal/codegen/link.cr +++ b/src/compiler/crystal/codegen/link.cr @@ -81,12 +81,20 @@ module Crystal end class CrystalLibraryPath + def self.default_paths : Array(String) + paths = ENV.fetch("CRYSTAL_LIBRARY_PATH", Crystal::Config.library_path).split(Process::PATH_DELIMITER, remove_empty: true) + + CrystalPath.expand_paths(paths) + + paths + end + def self.default_path : String - ENV.fetch("CRYSTAL_LIBRARY_PATH", Crystal::Config.library_path) + default_paths.join(Process::PATH_DELIMITER) end class_getter paths : Array(String) do - default_path.split(Process::PATH_DELIMITER, remove_empty: true) + default_paths end end diff --git a/src/compiler/crystal/codegen/match.cr b/src/compiler/crystal/codegen/match.cr index 8e0cc646c0e3..67bf74dd8c9b 100644 --- a/src/compiler/crystal/codegen/match.cr +++ b/src/compiler/crystal/codegen/match.cr @@ -47,6 +47,7 @@ class Crystal::CodeGenVisitor private def create_match_fun(name, type) in_main do define_main_function(name, ([llvm_context.int32]), llvm_context.int1) do |func| + set_internal_fun_debug_location(func, name) type_id = func.params.first create_match_fun_body(type, type_id) end diff --git a/src/compiler/crystal/codegen/primitives.cr b/src/compiler/crystal/codegen/primitives.cr index c0c79aadf11c..281b1374f813 100644 --- a/src/compiler/crystal/codegen/primitives.cr +++ b/src/compiler/crystal/codegen/primitives.cr @@ -610,7 +610,7 @@ class Crystal::CodeGenVisitor when from_type.normal_rank == to_type.normal_rank # if the normal_rank is the same (eg: UInt64 / Int64) # there is still chance for overflow - if checked + if from_type.kind != to_type.kind && checked overflow = codegen_out_of_range(to_type, from_type, arg) codegen_raise_overflow_cond(overflow) end @@ -916,6 +916,8 @@ class Crystal::CodeGenVisitor in_main do define_main_function(name, ([llvm_context.int32]), llvm_context.int32) do |func| + set_internal_fun_debug_location(func, name) + arg = func.params.first current_block = insert_block diff --git a/src/compiler/crystal/command.cr b/src/compiler/crystal/command.cr index 7f73093bccad..70aab5f366e3 100644 --- a/src/compiler/crystal/command.cr +++ b/src/compiler/crystal/command.cr @@ -190,7 +190,7 @@ class Crystal::Command private def hierarchy config, result = compile_no_codegen "tool hierarchy", hierarchy: true, top_level: true @progress_tracker.stage("Tool (hierarchy)") do - Crystal.print_hierarchy result.program, config.hierarchy_exp, config.output_format + Crystal.print_hierarchy result.program, STDOUT, config.hierarchy_exp, config.output_format end end diff --git a/src/compiler/crystal/crystal_path.cr b/src/compiler/crystal/crystal_path.cr index 5cb2ffae8fd7..e79beabb8ea5 100644 --- a/src/compiler/crystal/crystal_path.cr +++ b/src/compiler/crystal/crystal_path.cr @@ -13,22 +13,61 @@ module Crystal private DEFAULT_LIB_PATH = "lib" - def self.default_path - ENV["CRYSTAL_PATH"]? || begin - if Crystal::Config.path.blank? - DEFAULT_LIB_PATH - elsif Crystal::Config.path.split(Process::PATH_DELIMITER).includes?(DEFAULT_LIB_PATH) - Crystal::Config.path + def self.default_paths : Array(String) + if path = ENV["CRYSTAL_PATH"]? + path_array = path.split(Process::PATH_DELIMITER, remove_empty: true) + elsif path = Crystal::Config.path.presence + path_array = path.split(Process::PATH_DELIMITER, remove_empty: true) + unless path_array.includes?(DEFAULT_LIB_PATH) + path_array.unshift DEFAULT_LIB_PATH + end + else + path_array = [DEFAULT_LIB_PATH] + end + + expand_paths(path_array) + + path_array + end + + def self.default_path : String + default_paths.join(Process::PATH_DELIMITER) + end + + # Expand `$ORIGIN` in the paths to the directory where the compiler binary + # is located (at runtime). + # For install locations like + # `/path/prefix/bin/crystal` for the compiler + # `/path/prefix/share/crystal/src` for the standard library + # the path `$ORIGIN/../share/crystal/src` resolves to + # the standard library location. + # This generic path can be passed into the compiler via CRYSTAL_CONFIG_PATH + # to produce a portable binary that resolves the standard library path + # relative to the compiler location, independent of the absolute path. + def self.expand_paths(paths, origin) + paths.map! do |path| + if (chopped = path.lchop?("$ORIGIN")) && chopped[0].in?(::Path::SEPARATORS) + if origin.nil? + raise "Missing executable path to expand $ORIGIN path" + end + File.join(origin, chopped) else - {DEFAULT_LIB_PATH, Crystal::Config.path}.join(Process::PATH_DELIMITER) + path end end end + def self.expand_paths(paths) + origin = nil + if executable_path = Process.executable_path + origin = File.dirname(executable_path) + end + expand_paths(paths, origin) + end + property entries : Array(String) - def initialize(path = CrystalPath.default_path, codegen_target = Config.host_target) - @entries = path.split(Process::PATH_DELIMITER).reject &.empty? + def initialize(@entries : Array(String) = CrystalPath.default_paths, codegen_target = Config.host_target) add_target_path(codegen_target) end diff --git a/src/compiler/crystal/exception.cr b/src/compiler/crystal/exception.cr index d6e19227236d..f5af99742001 100644 --- a/src/compiler/crystal/exception.cr +++ b/src/compiler/crystal/exception.cr @@ -140,10 +140,13 @@ module Crystal decorator = line_number_decorator(line_number) lstripped_line = line.lstrip space_delta = line.chars.size - lstripped_line.chars.size + # Column number should start at `1`. We're using `0` to track bogus passed + # `column_number`. + final_column_number = (column_number - space_delta).clamp(0..) io << "\n\n" io << colorize(decorator).dim << colorize(lstripped_line.chomp).bold - append_error_indicator(io, decorator.chars.size, column_number - space_delta, size || 0) + append_error_indicator(io, decorator.chars.size, final_column_number, size || 0) end end diff --git a/src/compiler/crystal/program.cr b/src/compiler/crystal/program.cr index 0a52fdcfe2f1..722501add55e 100644 --- a/src/compiler/crystal/program.cr +++ b/src/compiler/crystal/program.cr @@ -26,10 +26,6 @@ module Crystal # All symbols (:foo, :bar) found in the program getter symbols = Set(String).new - # All global variables in the program ($foo, $bar), indexed by their name. - # The names includes the `$` sign. - getter global_vars = {} of String => MetaTypeVar - # Hash that prevents recursive splat expansions. For example: # # ``` @@ -123,7 +119,7 @@ module Crystal property compiler : Compiler? def initialize - super(self, self, "top_level") + super(self, self, "main") # Every crystal program comes with some predefined types that we initialize here, # like Object, Value, Reference, etc. diff --git a/src/compiler/crystal/semantic/cleanup_transformer.cr b/src/compiler/crystal/semantic/cleanup_transformer.cr index a7b48cca4fa7..864e09f7e062 100644 --- a/src/compiler/crystal/semantic/cleanup_transformer.cr +++ b/src/compiler/crystal/semantic/cleanup_transformer.cr @@ -265,7 +265,7 @@ module Crystal if target.is_a?(Path) const = target.target_const.not_nil! - return node unless const.used? + return node if !const.used? || const.cleaned_up? unless const.value.type? node.raise "can't infer type of constant #{const} (maybe the constant refers to itself?)" @@ -285,6 +285,7 @@ module Crystal if target.is_a?(Path) const = const.not_nil! const.value = const.value.transform self + const.cleaned_up = true end if node.target == node.value @@ -301,6 +302,18 @@ module Crystal node end + def transform(node : Path) + # Some constants might not have been cleaned up at this point because + # they don't have an explicit `Assign` node. One example is regex + # literals: a constant is created for them, but there's no `Assign` node. + if (const = node.target_const) && const.used? && !const.cleaned_up? + const.value = const.value.transform self + const.cleaned_up = true + end + + node + end + private def void_lib_call?(node) return unless node.is_a?(Call) diff --git a/src/compiler/crystal/semantic/exception.cr b/src/compiler/crystal/semantic/exception.cr index 394be93baf3f..1b367f3ddb97 100644 --- a/src/compiler/crystal/semantic/exception.cr +++ b/src/compiler/crystal/semantic/exception.cr @@ -314,25 +314,6 @@ module Crystal end class Program - def undefined_global_variable(node, similar_name) - common = String.build do |str| - str << "can't infer the type of global variable '#{node.name}'" - if similar_name - str << '\n' - str << colorize(" (did you mean #{similar_name}?)").yellow.bold.to_s - end - end - - msg = String.build do |str| - str << common - str << "\n\n" - str << undefined_variable_message("global", node.name) - str << "\n\n" - str << common - end - node.raise msg - end - def undefined_class_variable(node, owner, similar_name) common = String.build do |str| str << "can't infer the type of class variable '#{node.name}' of #{owner.devirtualize}" diff --git a/src/compiler/crystal/semantic/literal_expander.cr b/src/compiler/crystal/semantic/literal_expander.cr index c3d34946b6f1..57dd8b43e767 100644 --- a/src/compiler/crystal/semantic/literal_expander.cr +++ b/src/compiler/crystal/semantic/literal_expander.cr @@ -253,18 +253,16 @@ module Crystal # # /regex/flags # - # To: + # To declaring a constant with this value (if not already declared): # - # if temp_var = $some_global - # temp_var - # else - # $some_global = Regex.new("regex", Regex::Options.new(flags)) - # end + # ``` + # Regex.new("regex", Regex::Options.new(flags)) + # ``` # - # That is, cache the regex in a global variable. + # and then reading from that constant. + # That is, we cache regex literals to avoid recompiling them all of the time. # # Only do this for regex literals that don't contain interpolation. - # # If there's an interpolation, expand to: Regex.new(interpolation, flags) def expand(node : RegexLiteral) node_value = node.value @@ -273,30 +271,19 @@ module Crystal string = node_value.value key = {string, node.options} - index = @regexes.index key - unless index - index = @regexes.size - @regexes << key - end - - global_name = "$Regex:#{index}" - temp_name = @program.new_temp_var_name + index = @regexes.index(key) || @regexes.size + const_name = "$Regex:#{index}" - global_var = MetaTypeVar.new(global_name) - global_var.owner = @program - type = @program.nilable(@program.regex) - global_var.freeze_type = type - global_var.type = type + if index == @regexes.size + @regexes << key - # TODO: need to bind with nil_var for codegen, but shouldn't be needed - global_var.bind_to(@program.nil_var) + const_value = regex_new_call(node, StringLiteral.new(string).at(node)) + const = Const.new(@program, @program, const_name, const_value) - @program.global_vars[global_name] = global_var + @program.types[const_name] = const + end - first_assign = Assign.new(Var.new(temp_name).at(node), Global.new(global_name).at(node)).at(node) - regex = regex_new_call(node, StringLiteral.new(string).at(node)) - second_assign = Assign.new(Global.new(global_name).at(node), regex).at(node) - If.new(first_assign, Var.new(temp_name).at(node), second_assign).at(node) + Path.new(const_name) else regex_new_call(node, node_value) end diff --git a/src/compiler/crystal/semantic/main_visitor.cr b/src/compiler/crystal/semantic/main_visitor.cr index cf8d75a3513e..a20336a59102 100644 --- a/src/compiler/crystal/semantic/main_visitor.cr +++ b/src/compiler/crystal/semantic/main_visitor.cr @@ -87,6 +87,7 @@ module Crystal property last_block_kind : Symbol? property? inside_ensure : Bool = false property? inside_constant = false + property file_module : FileModule? @unreachable = false @is_initialize = false @@ -99,7 +100,6 @@ module Crystal @found_self_in_initialize_call : Array(ASTNode)? @used_ivars_in_calls_in_initialize : Hash(String, Array(ASTNode))? @block_context : Block? - @file_module : FileModule? @while_vars : MetaVars? # Type filters for `exp` in `!exp`, used after a `while` @@ -423,22 +423,6 @@ module Crystal class_var = lookup_class_var(var) var.var = class_var class_var.thread_local = true if thread_local - when Global - if @untyped_def - node.raise "declaring the type of a global variable must be done at the class level" - end - - thread_local = check_class_var_annotations - if thread_local - global_var = @program.global_vars[var.name] - global_var.thread_local = true - end - - if value = node.value - type_assign(var, value, node) - node.bind_to(var) - return false - end else raise "Bug: unexpected var type: #{var.class}" end @@ -586,39 +570,12 @@ module Crystal node.bind_to expanded node.expanded = expanded else - visit_global node + node.raise "BUG: there should be no use of global variables other than $~ and $?" end false end - def visit_global(node) - var = lookup_global_variable(node) - - if first_time_accessing_meta_type_var?(var) - var_type = var.type? - if var_type && !var_type.includes_type?(program.nil) - node.raise "global variable '#{node.name}' is read here before it was initialized, rendering it nilable, but its type is #{var_type}" - end - var.bind_to program.nil_var - end - - node.bind_to var - node.var = var - var - end - - def lookup_global_variable(node) - var = program.global_vars[node.name]? - undefined_global_variable(node) unless var - var - end - - def undefined_global_variable(node) - similar_name = lookup_similar_global_variable_name(node) - program.undefined_global_variable(node, similar_name) - end - def undefined_instance_variable(owner, node) similar_name = lookup_similar_instance_variable_name(node, owner) program.undefined_instance_variable(node, owner, similar_name) @@ -637,14 +594,6 @@ module Crystal end end - def lookup_similar_global_variable_name(node) - Levenshtein.find(node.name) do |finder| - program.global_vars.each_key do |name| - finder.test(name) - end - end - end - def first_time_accessing_meta_type_var?(var) return false if var.uninitialized? @@ -924,26 +873,7 @@ module Crystal end def type_assign(target : Global, value, node) - thread_local = check_class_var_annotations - - value.accept self - - var = lookup_global_variable(target) - - # If we are assigning to a global inside a method, make it nilable - # if this is the first time we are assigning to it, because - # the method might be called conditionally - if @typed_def && first_time_accessing_meta_type_var?(var) - var.bind_to program.nil_var - end - - var.thread_local = true if thread_local - target.var = var - - target.bind_to var - - node.bind_to value - var.bind_to value + node.raise "BUG: there should be no use of global variables other than $~ and $?" end def type_assign(target : ClassVar, value, node) @@ -1130,6 +1060,7 @@ module Crystal block_visitor.parent = self block_visitor.with_scope = node.scope || with_scope block_visitor.exception_handler_vars = @exception_handler_vars + block_visitor.file_module = @file_module block_scope = @scope block_scope ||= current_type.metaclass unless current_type.is_a?(Program) @@ -2490,7 +2421,7 @@ module Crystal when ClassVar visit_class_var exp when Global - visit_global exp + node.raise "BUG: there should be no use of global variables other than $~ and $?" when Path exp.accept self if const = exp.target_const diff --git a/src/compiler/crystal/semantic/match.cr b/src/compiler/crystal/semantic/match.cr index 3779ed0fd5f6..329802a5ff6d 100644 --- a/src/compiler/crystal/semantic/match.cr +++ b/src/compiler/crystal/semantic/match.cr @@ -46,12 +46,10 @@ module Crystal # Any instance variables associated with the method instantiation getter free_vars : Hash(String, TypeVar)? - getter? strict : Bool - # Def free variables, unbound (`def (X, Y) ...`) property def_free_vars : Array(String)? - def initialize(@instantiated_type, @defining_type, @free_vars = nil, @strict = false, @def_free_vars = nil) + def initialize(@instantiated_type, @defining_type, @free_vars = nil, @def_free_vars = nil) end def get_free_var(name) @@ -93,7 +91,7 @@ module Crystal end def clone - MatchContext.new(@instantiated_type, @defining_type, @free_vars.dup, @strict, @def_free_vars.dup) + MatchContext.new(@instantiated_type, @defining_type, @free_vars.dup, @def_free_vars.dup) end end diff --git a/src/compiler/crystal/semantic/restrictions.cr b/src/compiler/crystal/semantic/restrictions.cr index 08bb3fe7abd1..6bcf930c23b3 100644 --- a/src/compiler/crystal/semantic/restrictions.cr +++ b/src/compiler/crystal/semantic/restrictions.cr @@ -33,39 +33,39 @@ require "../types" module Crystal class ASTNode - def restriction_of?(other : Underscore, owner, strict = false) + def restriction_of?(other : Underscore, owner) true end - def restriction_of?(other : ASTNode, owner, strict = false) + def restriction_of?(other : ASTNode, owner) self == other end - def restriction_of?(other : Type, owner, strict = false) + def restriction_of?(other : Type, owner) false end - def restriction_of?(other, owner, strict = false) + def restriction_of?(other, owner) raise "BUG: called #{self}.restriction_of?(#{other})" end end class Self - def restriction_of?(type : Type, owner, strict = false) - owner.restriction_of?(type, owner, strict) + def restriction_of?(type : Type, owner) + owner.restriction_of?(type, owner) end - def restriction_of?(type : Self, owner, strict = false) + def restriction_of?(type : Self, owner) true end - def restriction_of?(type : ASTNode, owner, strict = false) + def restriction_of?(type : ASTNode, owner) false end end struct DefWithMetadata - def restriction_of?(other : DefWithMetadata, owner, strict = false) + def restriction_of?(other : DefWithMetadata, owner) # This is how multiple defs are sorted by 'restrictions' (?) # If one yields and the other doesn't, none is stricter than the other @@ -118,7 +118,7 @@ module Crystal # If this is a splat arg and the other not, this is not stricter than the other return false if index == self.def.splat_index - return false unless self_type.restriction_of?(other_type, owner, strict) + return false unless self_type.restriction_of?(other_type, owner) end end @@ -131,7 +131,7 @@ module Crystal if self_restriction && other_restriction # If both splat have restrictions, check which one is stricter - return false unless self_restriction.restriction_of?(other_restriction, owner, strict) + return false unless self_restriction.restriction_of?(other_restriction, owner) elsif self_restriction # If only self has a restriction, it's stricter than the other return true @@ -165,7 +165,7 @@ module Crystal return false if self_restriction == nil && other_restriction != nil if self_restriction && other_restriction - return false unless self_restriction.restriction_of?(other_restriction, owner, strict) + return false unless self_restriction.restriction_of?(other_restriction, owner) end end @@ -182,7 +182,7 @@ module Crystal # If both double splat have restrictions, check which one is stricter if self_double_splat_restriction && other_double_splat_restriction - return false unless self_double_splat_restriction.restriction_of?(other_double_splat_restriction, owner, strict) + return false unless self_double_splat_restriction.restriction_of?(other_double_splat_restriction, owner) elsif self_double_splat_restriction # If only self has a restriction, it's stricter than the other return true @@ -241,14 +241,14 @@ module Crystal end class Path - def restriction_of?(other : Path, owner, strict = false) + def restriction_of?(other : Path, owner) return true if self == other self_type = owner.lookup_path(self) if self_type other_type = owner.lookup_path(other) if other_type - return self_type.restriction_of?(other_type, owner, strict) + return self_type.restriction_of?(other_type, owner) else return true end @@ -257,17 +257,17 @@ module Crystal false end - def restriction_of?(other : Union, owner, strict = false) + def restriction_of?(other : Union, owner) # `true` if this type is a restriction of any type in the union - other.types.any? { |o| self.restriction_of?(o, owner, strict) } + other.types.any? { |o| self.restriction_of?(o, owner) } end - def restriction_of?(other : Generic, owner, strict = false) + def restriction_of?(other : Generic, owner) self_type = owner.lookup_path(self) if self_type other_type = owner.lookup_type?(other) if other_type - return self_type.restriction_of?(other_type, owner, strict) + return self_type.restriction_of?(other_type, owner) end end @@ -281,23 +281,23 @@ module Crystal false end - def restriction_of?(other, owner, strict = false) + def restriction_of?(other, owner) false end end class Union - def restriction_of?(other : Path, owner, strict = false) + def restriction_of?(other : Path, owner) # For a union to be considered before a path, # all types in the union must be considered before # that path. # For example when using all subtypes of a parent type. - types.all? &.restriction_of?(other, owner, strict) + types.all? &.restriction_of?(other, owner) end end class Generic - def restriction_of?(other : Path, owner, strict = false) + def restriction_of?(other : Path, owner) # ``` # def foo(param : Array(T)) forall T # end @@ -312,7 +312,7 @@ module Crystal if self_type other_type = owner.lookup_path(other) if other_type - return self_type.restriction_of?(other_type, owner, strict) + return self_type.restriction_of?(other_type, owner) end end @@ -332,19 +332,19 @@ module Crystal true end - def restriction_of?(other : Generic, owner, strict = false) + def restriction_of?(other : Generic, owner) return true if self == other return false unless name == other.name && type_vars.size == other.type_vars.size # Special case: NamedTuple against NamedTuple if (self_type = owner.lookup_type?(self)).is_a?(NamedTupleInstanceType) if (other_type = owner.lookup_type?(other)).is_a?(NamedTupleInstanceType) - return self_type.restriction_of?(other_type, owner, strict) + return self_type.restriction_of?(other_type, owner) end end type_vars.zip(other.type_vars) do |type_var, other_type_var| - return false unless type_var.restriction_of?(other_type_var, owner, strict) + return false unless type_var.restriction_of?(other_type_var, owner) end true @@ -352,7 +352,7 @@ module Crystal end class GenericClassType - def restriction_of?(other : GenericClassInstanceType, owner, strict = false) + def restriction_of?(other : GenericClassInstanceType, owner) # ``` # def foo(param : Array) # end @@ -374,7 +374,7 @@ module Crystal end class GenericClassInstanceType - def restriction_of?(other : GenericClassType, owner, strict = false) + def restriction_of?(other : GenericClassType, owner) # ``` # def foo(param : Array(Int32)) # end @@ -392,11 +392,11 @@ module Crystal end class Metaclass - def restriction_of?(other : Metaclass, owner, strict = false) - name.restriction_of?(other.name, owner, strict) + def restriction_of?(other : Metaclass, owner) + name.restriction_of?(other.name, owner) end - def restriction_of?(other : Path, owner, strict = false) + def restriction_of?(other : Path, owner) other_type = owner.lookup_type(other) # Special case: when comparing Foo.class to Class, Foo.class has precedence @@ -423,7 +423,7 @@ module Crystal return self end - if parents.try &.any? &.restriction_of?(other, context.instantiated_type, context.strict?) + if parents.try &.any? &.restriction_of?(other, context.instantiated_type) return self end @@ -467,7 +467,7 @@ module Crystal def restrict(other : GenericClassType, context) parents.try &.each do |parent| if parent.module? - return self if parent.restriction_of?(other, context.instantiated_type, context.strict?) + return self if parent.restriction_of?(other, context.instantiated_type) else restricted = parent.restrict other, context return self if restricted @@ -611,31 +611,31 @@ module Crystal raise "BUG: unsupported restriction: #{self} vs. #{other}" end - def restriction_of?(other : UnionType, owner, strict = false) - other.union_types.any? { |subtype| restriction_of?(subtype, owner, strict) } + def restriction_of?(other : UnionType, owner) + other.union_types.any? { |subtype| restriction_of?(subtype, owner) } end - def restriction_of?(other : VirtualType, owner, strict = false) + def restriction_of?(other : VirtualType, owner) implements? other.base_type end - def restriction_of?(other : Type, owner, strict = false) + def restriction_of?(other : Type, owner) if self == other return true end - parents.try &.any? &.restriction_of?(other, owner, strict) + parents.try &.any? &.restriction_of?(other, owner) end - def restriction_of?(other : AliasType, owner, strict = false) + def restriction_of?(other : AliasType, owner) if self == other true else - restriction_of?(other.remove_alias, owner, strict) + restriction_of?(other.remove_alias, owner) end end - def restriction_of?(other : ASTNode, owner, strict = false) + def restriction_of?(other : ASTNode, owner) raise "BUG: called #{self}.restriction_of?(#{other})" end @@ -645,8 +645,8 @@ module Crystal end class UnionType - def restriction_of?(type, owner, strict = false) - self == type || union_types.all? &.restriction_of?(type, owner, strict) + def restriction_of?(type, owner) + self == type || union_types.all? &.restriction_of?(type, owner) end def restrict(other : Union, context) @@ -719,23 +719,22 @@ module Crystal end class GenericInstanceType - def restriction_of?(other : GenericType, owner, strict = false) + def restriction_of?(other : GenericType, owner) return true if generic_type == other super end - def restriction_of?(other : GenericInstanceType, owner, strict = false) + def restriction_of?(other : GenericInstanceType, owner) return super unless generic_type == other.generic_type type_vars.each do |name, type_var| other_type_var = other.type_vars[name] if type_var.is_a?(Var) && other_type_var.is_a?(Var) - restricted = if strict - type_var.type.devirtualize == other_type_var.type.devirtualize - else - type_var.type.implements?(other_type_var.type) - end - return nil unless restricted + # This overload can be called when the restriction node has a type due + # to e.g. AbstractDefChecker; generic instances shall behave like AST + # nodes when def restrictions are considered, i.e. all generic type + # variables are covariant. + return nil unless type_var.type.implements?(other_type_var.type) else return nil unless type_var == other_type_var end @@ -749,7 +748,7 @@ module Crystal parents.try &.each do |parent| if parent.module? - return self if parent.restriction_of?(other, context.instantiated_type, context.strict?) + return self if parent.restriction_of?(other, context.instantiated_type) else restricted = parent.restrict other, context return self if restricted @@ -897,9 +896,7 @@ module Crystal end if type_var.is_a?(ASTNode) - type_var.restriction_of?(other_type_var, context.instantiated_type, context.strict?) - elsif context.strict? - type_var == other_type_var + type_var.restriction_of?(other_type_var, context.instantiated_type) else # To prevent infinite recursion, it checks equality between # `type_var` and `other_type_var` directly before try to restrict @@ -910,7 +907,7 @@ module Crystal end class TupleInstanceType - def restriction_of?(other : TupleInstanceType, owner, strict = false) + def restriction_of?(other : TupleInstanceType, owner) return true if self == other || self.implements?(other) false @@ -970,7 +967,7 @@ module Crystal end class NamedTupleInstanceType - def restriction_of?(other : NamedTupleInstanceType, owner, strict = false) + def restriction_of?(other : NamedTupleInstanceType, owner) return true if self == other || self.implements?(other) false @@ -1009,7 +1006,7 @@ module Crystal end class VirtualType - def restriction_of?(other : Type, owner, strict = false) + def restriction_of?(other : Type, owner) other = other.base_type if other.is_a?(VirtualType) base_type.implements?(other) || other.implements?(base_type) end @@ -1087,10 +1084,10 @@ module Crystal end class AliasType - def restriction_of?(other, owner, strict = false) + def restriction_of?(other, owner) return true if self == other - remove_alias.restriction_of?(other, owner, strict) + remove_alias.restriction_of?(other, owner) end def restrict(other : Path, context) @@ -1178,8 +1175,8 @@ module Crystal restricted ? self : nil end - def restriction_of?(other : VirtualMetaclassType, owner, strict = false) - restriction_of?(other.base_type.metaclass, owner, strict) + def restriction_of?(other : VirtualMetaclassType, owner) + restriction_of?(other.base_type.metaclass, owner) end end diff --git a/src/compiler/crystal/semantic/type_declaration_processor.cr b/src/compiler/crystal/semantic/type_declaration_processor.cr index 98ac0b43a573..b5770704238e 100644 --- a/src/compiler/crystal/semantic/type_declaration_processor.cr +++ b/src/compiler/crystal/semantic/type_declaration_processor.cr @@ -118,6 +118,10 @@ struct Crystal::TypeDeclarationProcessor # Types whose initialize methods are all macro defs @has_macro_def = Set(Type).new + # Types that are not extended by any other types, used to speed up detection + # of instance vars in extended modules + @has_no_extenders = Set(Type).new + @type_decl_visitor = TypeDeclarationVisitor.new(@program, @explicit_instance_vars) @type_guess_visitor = TypeGuessVisitor.new(@program, @explicit_instance_vars, @@ -254,7 +258,11 @@ struct Crystal::TypeDeclarationProcessor # set from uninstantiated generic types return if owner.is_a?(GenericInstanceType) - if owner.metaclass? + if owner.is_a?(NonGenericModuleType) || owner.is_a?(GenericModuleType) + if extender = find_extending_type(owner) + raise TypeException.new("can't declare instance variables in #{owner} because #{extender} extends it", type_decl.location) + end + elsif owner.metaclass? raise TypeException.new("can't declare instance variables in #{owner}", type_decl.location) end @@ -299,6 +307,26 @@ struct Crystal::TypeDeclarationProcessor end end + private def find_extending_type(mod) + return nil if @has_no_extenders.includes?(mod) + + mod.raw_including_types.try &.each do |includer| + case includer + when .metaclass? + return includer.instance_type + when NonGenericModuleType + type = find_extending_type(includer) + return type if type + when GenericModuleInstanceType + type = find_extending_type(includer.generic_type.as(GenericModuleType)) + return type if type + end + end + + @has_no_extenders << mod + nil + end + private def check_non_nilable_for_generic_module(owner, name, type_decl) case owner when GenericModuleType @@ -344,6 +372,14 @@ struct Crystal::TypeDeclarationProcessor # set from uninstantiated generic types return if owner.is_a?(GenericInstanceType) + if owner.is_a?(NonGenericModuleType) || owner.is_a?(GenericModuleType) + if extender = find_extending_type(owner) + raise TypeException.new("can't declare instance variables in #{owner} because #{extender} extends it", type_info.location) + end + elsif owner.metaclass? + raise TypeException.new("can't declare instance variables in #{owner}", type_info.location) + end + # If a superclass already defines this variable we ignore # the guessed type information for subclasses supervar = owner.lookup_instance_var?(name) diff --git a/src/compiler/crystal/semantic/type_intersect.cr b/src/compiler/crystal/semantic/type_intersect.cr new file mode 100644 index 000000000000..c05ba59bf08f --- /dev/null +++ b/src/compiler/crystal/semantic/type_intersect.cr @@ -0,0 +1,268 @@ +require "../program" + +module Crystal + class Type + # Given two types T and U, returns a common descendent V such that V <= T + # and V <= U. This is the same as: + # + # ``` + # typeof(begin + # x = uninitialized T + # x.is_a?(U) ? x : raise "" + # end) + # ``` + # + # except that `nil` is returned if the above produces `NoReturn`. + def self.common_descendent(type1 : Type, type2 : Type) + common_descendent_base(type1, type2) + end + + def self.common_descendent(type1 : TupleInstanceType, type2 : TupleInstanceType) + type1.implements?(type2) ? type1 : nil + end + + def self.common_descendent(type1 : NamedTupleInstanceType, type2 : NamedTupleInstanceType) + type1.implements?(type2) ? type1 : nil + end + + def self.common_descendent(type1 : ProcInstanceType, type2 : ProcInstanceType) + type1.compatible_with?(type2) ? type2 : nil + end + + def self.common_descendent(type1 : NonGenericModuleType | GenericModuleInstanceType, type2 : AliasType) + common_descendent(type1, type2.remove_alias) || + common_descendent_including_types(type1, type2) + end + + def self.common_descendent(type1 : NonGenericModuleType | GenericModuleInstanceType, type2 : UnionType) + common_descendent_union(type1, type2) || + common_descendent_including_types(type1, type2) + end + + def self.common_descendent(type1 : NonGenericModuleType | GenericModuleInstanceType, type2 : VirtualType) + common_descendent_including_types(type1, type2) + end + + def self.common_descendent(type1 : NonGenericModuleType | GenericModuleInstanceType, type2 : GenericClassType) + common_descendent_instance_and_generic(type1, type2) || + common_descendent_including_types(type1, type2) + end + + def self.common_descendent(type1 : GenericModuleInstanceType, type2 : GenericModuleInstanceType) + common_descendent_generic_instances(type1, type2) || + common_descendent_base(type1, type2) || + common_descendent_including_types(type1, type2) + end + + def self.common_descendent(type1 : GenericModuleInstanceType, type2 : GenericModuleType) + return type1 if type1.generic_type == type2 + + common_descendent_instance_and_generic(type1, type2) || + common_descendent_including_types(type1, type2) + end + + def self.common_descendent(type1 : NonGenericModuleType | GenericModuleInstanceType, type2 : Type) + common_descendent_base(type1, type2) || + common_descendent_including_types(type1, type2) + end + + def self.common_descendent(type1 : GenericClassInstanceType, type2 : GenericClassType) + return type1 if type1.generic_type == type2 + + common_descendent_instance_and_generic(type1, type2) + end + + def self.common_descendent(type1 : GenericInstanceType, type2 : GenericInstanceType) + common_descendent_generic_instances(type1, type2) || + common_descendent_base(type1, type2) + end + + def self.common_descendent(type1 : MetaclassType, type2 : VirtualMetaclassType) + # A module class can't be restricted into a class + return nil if type1.instance_type.module? + + restricted = common_descendent(type1.instance_type, type2.instance_type.base_type) + restricted ? type1 : nil + end + + def self.common_descendent(type1 : GenericClassInstanceMetaclassType | GenericModuleInstanceMetaclassType, type2 : MetaclassType) + return type1 if type1.instance_type.generic_type.metaclass == type2 + + restricted = common_descendent(type1.instance_type, type2.instance_type) + restricted ? type1 : nil + end + + def self.common_descendent(type1 : UnionType, type2 : Type) + types = type1.union_types.compact_map do |union_type| + common_descendent(union_type, type2) + end + type1.program.type_merge_union_of(types) + end + + def self.common_descendent(type1 : AliasType, type2 : AliasType) + return type1 if type1 == type2 + + if !type1.simple? && !type2.simple? + return nil + end + + common_descendent(type1.remove_alias, type2) + end + + def self.common_descendent(type1 : AliasType, type2 : Type) + common_descendent(type1.remove_alias, type2) + end + + def self.common_descendent(type1 : TypeDefType, type2 : UnionType) + common_descendent_union(type1, type2) + end + + def self.common_descendent(type1 : TypeDefType, type2 : AliasType) + type2 = type2.remove_alias + return type1 if type1 == type2 + common_descendent(type1, type2) + end + + def self.common_descendent(type1 : TypeDefType, type2 : Type) + return type1 if type1 == type2 + + restricted = common_descendent(type1.typedef, type2) + if restricted == type1.typedef + return type1 + elsif restricted.is_a?(UnionType) + type1.program.type_merge(restricted.union_types.map { |t| t == type1.typedef ? type1 : t }) + else + restricted + end + end + + def self.common_descendent(type1 : VirtualType, type2 : VirtualType) + return type1 if type1 == type2 + + base_type1 = type1.base_type + base_type2 = type2.base_type + (common_descendent(base_type1, base_type2) || common_descendent(base_type2, base_type1)).try &.virtual_type + end + + def self.common_descendent(type1 : VirtualType, type2 : AliasType) + common_descendent(type1, type2.remove_alias) + end + + def self.common_descendent(type1 : VirtualType, type2 : UnionType) + types = type2.union_types.compact_map do |t| + common_descendent(type1, t) + end + type1.program.type_merge types + end + + def self.common_descendent(type1 : VirtualType, type2 : Type) + base_type = type1.base_type + + if type2.implements?(base_type) + type2.virtual_type + elsif base_type.implements?(type2) + type1 + elsif type2.module? + types = base_type.subclasses.compact_map do |subclass| + common_descendent(subclass.virtual_type, type2) + end + type1.program.type_merge_union_of types + elsif base_type.is_a?(GenericInstanceType) && type2.is_a?(GenericType) + # Consider the case of Foo(Int32) vs. Bar(T), with Bar(T) < Foo(T): + # we want to return Bar(Int32), so we search in Bar's generic instantiations + types = type2.instantiated_types.compact_map do |instance| + next if instance.unbound? || instance.abstract? + instance.virtual_type if instance.implements?(base_type) + end + type1.program.type_merge_union_of types + else + nil + end + end + + def self.common_descendent(type1 : NilType, type2 : VoidType) + # Allow Nil to match Void (useful for `Pointer(Void)#value=`) + type1 + end + + def self.common_descendent(type1 : GenericClassType, type2 : GenericClassType) + return type1 if type1 == type2 + + common_descendent_instance_and_generic(type1, type2) + end + + def self.common_descendent(type1 : Type, type2 : AliasType) + return type1 if type1 == type2 + + common_descendent(type1, type2.remove_alias) + end + + def self.common_descendent(type1 : Type, type2 : UnionType) + common_descendent_union(type1, type2) + end + + def self.common_descendent(type1 : Type, type2 : VirtualType) + type1.implements?(type2.base_type) ? type1 : nil + end + + def self.common_descendent(type1 : Type, type2 : GenericClassType) + common_descendent_instance_and_generic(type1, type2) + end + + private def self.common_descendent_base(type1, type2) + if type1 == type2 + return type1 + end + + if type1.parents.try &.any? &.implements?(type2) + return type1 + end + end + + private def self.common_descendent_union(type, union) + restricted = nil + + union.union_types.each do |union_type| + # Apply the restriction logic on each union type, even if we already + # have a match, so that we can detect ambiguous calls between of + # literal types against aliases that resolve to union types. + restriction = common_descendent(type, union_type) + restricted ||= restriction + end + + restricted ? type : nil + end + + private def self.common_descendent_including_types(mod, type) + mod.including_types.try { |t| common_descendent(t, type) } + end + + private def self.common_descendent_instance_and_generic(instance, generic) + instance.parents.try &.each do |parent| + if parent.module? + return instance if parent.implements?(generic) + else + restricted = common_descendent(parent, generic) + return instance if restricted + end + end + end + + private def self.common_descendent_generic_instances(type1, type2) + return nil unless type1.generic_type == type2.generic_type + + type1.type_vars.each do |name, type_var1| + type_var2 = type2.type_vars[name] + if type_var1.is_a?(Var) && type_var2.is_a?(Var) + # type vars are invariant except for Tuple and NamedTuple and those have + # separate logic + return nil unless type_var1.type.devirtualize == type_var2.type.devirtualize + else + return nil unless type_var1 == type_var2 + end + end + + type1 + end + end +end diff --git a/src/compiler/crystal/syntax/lexer.cr b/src/compiler/crystal/syntax/lexer.cr index 570a260e9806..bc2614a26ad9 100644 --- a/src/compiler/crystal/syntax/lexer.cr +++ b/src/compiler/crystal/syntax/lexer.cr @@ -55,6 +55,7 @@ module Crystal def initialize(string, string_pool : StringPool? = nil) @reader = Char::Reader.new(string) @token = Token.new + @temp_token = Token.new @line_number = 1 @column_number = 1 @filename = "" @@ -636,6 +637,8 @@ module Crystal when '~' next_char :"~" when '.' + line = @line_number + column = @column_number case next_char when '.' case next_char @@ -644,6 +647,8 @@ module Crystal else @token.type = :".." end + when .ascii_number? + raise ".1 style number literal is not supported, put 0 before dot", line, column else @token.type = :"." end @@ -2411,7 +2416,10 @@ module Crystal when '\\' char = next_char if delimiter_state - if char == delimiter_state.end + case char + when delimiter_state.end + char = next_char + when '\\' char = next_char end whitespace = false @@ -2524,14 +2532,23 @@ module Crystal @token end - def lookahead - old_pos = @reader.pos - old_line_number, old_column_number = @line_number, @column_number + def lookahead(preserve_token_on_fail = false) + old_pos, old_line, old_column = current_pos, @line_number, @column_number + @temp_token.copy_from(@token) if preserve_token_on_fail result = yield unless result - @reader.pos = old_pos - @line_number, @column_number = old_line_number, old_column_number + self.current_pos, @line_number, @column_number = old_pos, old_line, old_column + @token.copy_from(@temp_token) if preserve_token_on_fail + end + result + end + + def peek_ahead + result = uninitialized typeof(yield) + lookahead(preserve_token_on_fail: true) do + result = yield + nil end result end diff --git a/src/compiler/crystal/syntax/parser.cr b/src/compiler/crystal/syntax/parser.cr index 2eccad8ae456..f63b28f993bd 100644 --- a/src/compiler/crystal/syntax/parser.cr +++ b/src/compiler/crystal/syntax/parser.cr @@ -26,7 +26,6 @@ module Crystal def initialize(str, string_pool : StringPool? = nil, @def_vars = [Set(String).new]) super(str, string_pool) - @temp_token = Token.new @unclosed_stack = [] of Unclosed @calls_super = false @calls_initialize = false @@ -624,12 +623,7 @@ module Crystal end # Allow '.' after newline for chaining calls - old_pos, old_line, old_column = current_pos, @line_number, @column_number - @temp_token.copy_from @token - next_token_skip_space_or_newline - unless @token.type == :"." - self.current_pos, @line_number, @column_number = old_pos, old_line, old_column - @token.copy_from @temp_token + unless lookahead(preserve_token_on_fail: true) { next_token_skip_space_or_newline; @token.type == :"." } break end when :"." @@ -965,21 +959,10 @@ module Crystal location = @token.location var = Var.new(@token.to_s).at(location) - old_pos, old_line, old_column = current_pos, @line_number, @column_number - @temp_token.copy_from(@token) - - next_token_skip_space - - if @token.type == :"=" - @token.copy_from(@temp_token) - self.current_pos, @line_number, @column_number = old_pos, old_line, old_column - + if peek_ahead { next_token_skip_space; @token.type == :"=" } push_var var node_and_next_token var else - @token.copy_from(@temp_token) - self.current_pos, @line_number, @column_number = old_pos, old_line, old_column - node_and_next_token Global.new(var.name).at(location) end when :GLOBAL_MATCH_DATA_INDEX @@ -5085,20 +5068,18 @@ module Crystal # Looks ahead next tokens to check whether they indicate type. def type_start?(*, consume_newlines) - old_pos, old_line, old_column = current_pos, @line_number, @column_number - @temp_token.copy_from(@token) + peek_ahead do + begin + if consume_newlines + next_token_skip_space_or_newline + else + next_token_skip_space + end - begin - if consume_newlines - next_token_skip_space_or_newline - else - next_token_skip_space + type_start? + rescue + false end - - type_start? - ensure - @token.copy_from(@temp_token) - self.current_pos, @line_number, @column_number = old_pos, old_line, old_column end end @@ -5368,7 +5349,7 @@ module Crystal args = call_args.args if call_args if args && !args.empty? - if args.size == 1 + if args.size == 1 && !args.first.is_a?(Splat) node = klass.new(args.first) else tuple = TupleLiteral.new(args).at(args.last) diff --git a/src/compiler/crystal/tools/doc/generator.cr b/src/compiler/crystal/tools/doc/generator.cr index 6297e23665e5..b5c2b4eb2dcc 100644 --- a/src/compiler/crystal/tools/doc/generator.cr +++ b/src/compiler/crystal/tools/doc/generator.cr @@ -1,3 +1,5 @@ +require "../../../../../lib/markd/src/markd" + class Crystal::Doc::Generator getter program : Program @@ -310,12 +312,17 @@ class Crystal::Doc::Generator def doc(context, string) string = isolate_flag_lines string string += build_flag_lines_from_annotations context - markdown = String.build do |io| - Markdown.parse string, Markdown::DocRenderer.new(context, io) - end + markdown = render_markdown(context, string) generate_flags markdown end + private def render_markdown(context, source) + options = ::Markd::Options.new + document = ::Markd::Parser.parse(source, options) + renderer = MarkdDocRenderer.new(context, options) + renderer.render(document).chomp + end + def fetch_doc_lines(doc : String) : String doc.gsub /\n+/ { |match| match.size == 1 ? " " : "\n" } end diff --git a/src/compiler/crystal/tools/doc/markdown/doc_renderer.cr b/src/compiler/crystal/tools/doc/markd_doc_renderer.cr similarity index 51% rename from src/compiler/crystal/tools/doc/markdown/doc_renderer.cr rename to src/compiler/crystal/tools/doc/markd_doc_renderer.cr index 2fb2d7e62009..76c5ba33aab6 100644 --- a/src/compiler/crystal/tools/doc/markdown/doc_renderer.cr +++ b/src/compiler/crystal/tools/doc/markd_doc_renderer.cr @@ -1,37 +1,57 @@ -require "./*" - -class Crystal::Doc::Markdown::DocRenderer < Crystal::Doc::Markdown::HTMLRenderer - def self.new(obj : Constant | Macro | Method, io) - new obj.type, io +class Crystal::Doc::MarkdDocRenderer < Markd::HTMLRenderer + def initialize(@type : Crystal::Doc::Type, options) + super(options) end - @type : Crystal::Doc::Type + def self.new(obj : Constant | Macro | Method, options) + new obj.type, options + end - def initialize(@type : Crystal::Doc::Type, io) - super(io) + def heading(node : Markd::Node, entering : Bool) + tag_name = HEADINGS[node.data["level"].as(Int32) - 1] + if entering + anchor = collect_text(node) + .underscore # Underscore the string + .gsub(/[^\w\d\s\-.~]/, "") # Delete unsafe URL characters + .strip # Strip leading/trailing whitespace + .gsub(/[\s_-]+/, '-') # Replace `_` and leftover whitespace with `-` + + tag(tag_name, attrs(node)) + literal Crystal::Doc.anchor_link(anchor) + else + tag(tag_name, end_tag: true) + newline + end + end - @inside_inline_code = false - @code_buffer = IO::Memory.new - @inside_code = false - @inside_link = false + def collect_text(main) + String.build do |io| + walker = main.walker + while item = walker.next + node, entering = item + if entering && (text = node.text) + io << text + end + end + end end - # For inline code we search if there's a method with that name in - # the current type (it's usual to refer to these as `method`). - # - # If there is a match, we output the link without the ... - # tag (looks better). If there isn't a match, we want to preserve the code tag. - def begin_inline_code - super - @inside_inline_code = true - @code_buffer.clear + def code(node : Markd::Node, entering : Bool) + tag("code") do + if in_link?(node) + output(node.text) + else + literal(expand_code_links(node.text)) + end + end end - def end_inline_code - @inside_inline_code = false + def in_link?(node) + parent = node.parent? + return false unless parent + return true if parent.type.link? - @io << expand_code_links(@code_buffer.to_s) - super + in_link?(parent) end def expand_code_links(text : String) : String @@ -87,71 +107,50 @@ class Crystal::Doc::Markdown::DocRenderer < Crystal::Doc::Markdown::HTMLRenderer end end - def begin_code(language = nil) - if language.nil? || language == "cr" - language = "crystal" - end - - super - - if language == "crystal" - @inside_code = true - @code_buffer.clear - end - end - - def end_code - if @inside_code - text = Highlighter.highlight(@code_buffer.to_s) - @io << text - end - - @inside_code = false - - super - end - - def begin_link(url) - @io << %() + def code_block(node : Markd::Node, entering : Bool) + languages = node.fence_language ? node.fence_language.split : nil + code_tag_attrs = attrs(node) + pre_tag_attrs = if @options.prettyprint + {"class" => "prettyprint"} + else + nil + end - @inside_link = true - end - - def end_link - super - @inside_link = false - end + language = languages.try &.first?.try &.strip + language = nil if language.try &.empty? - def text(text) - if @inside_code - @code_buffer << text - return + if language.nil? || language == "cr" + language = "crystal" end - if @inside_link - super - return + if language + code_tag_attrs ||= {} of String => String + code_tag_attrs["class"] = "language-#{escape(language)}" end - if @inside_inline_code - @code_buffer << text - return + newline + tag("pre", pre_tag_attrs) do + tag("code", code_tag_attrs) do + code = node.text.chomp + if language == "crystal" + literal(Highlighter.highlight code) + else + output(code) + end + end end - - super(text) + newline end - def type_link(type, text) + private def type_link(type, text) %(#{text}) end - def method_link(method, text) + private def method_link(method, text) %(#{text}) end - def lookup_method(type, name, args, kind = nil) + private def lookup_method(type, name, args, kind = nil) case args when "" args_count = nil diff --git a/src/compiler/crystal/tools/doc/markdown/html_renderer.cr b/src/compiler/crystal/tools/doc/markdown/html_renderer.cr deleted file mode 100644 index 3e6880bcc597..000000000000 --- a/src/compiler/crystal/tools/doc/markdown/html_renderer.cr +++ /dev/null @@ -1,121 +0,0 @@ -require "./renderer" - -class Crystal::Doc::Markdown::HTMLRenderer - include Renderer - - def initialize(@io : IO) - end - - def begin_paragraph - @io << "

" - end - - def end_paragraph - @io << "

" - end - - def begin_italic - @io << "" - end - - def end_italic - @io << "" - end - - def begin_bold - @io << "" - end - - def end_bold - @io << "" - end - - def begin_header(level : Int32, anchor : String) : Nil - @io << "' - @io << Crystal::Doc.anchor_link(anchor) - end - - def end_header(level) - @io << "' - end - - def begin_inline_code - @io << "" - end - - def end_inline_code - @io << "" - end - - def begin_code(language) - if language.nil? - @io << "
"
-    else
-      @io << %(
)
-    end
-  end
-
-  def end_code
-    @io << "
" - end - - def begin_quote - @io << "
" - end - - def end_quote - @io << "
" - end - - def begin_unordered_list - @io << "
    " - end - - def end_unordered_list - @io << "
" - end - - def begin_ordered_list - @io << "
    " - end - - def end_ordered_list - @io << "
" - end - - def begin_list_item - @io << "
  • " - end - - def end_list_item - @io << "
  • " - end - - def begin_link(url) - @io << %() - end - - def end_link - @io << "" - end - - def image(url, alt) - @io << %()
-    @io << alt
-    @io << %() - end - - def text(text) - @io << text.gsub('<', "<") - end - - def horizontal_rule - @io << "
    " - end -end diff --git a/src/compiler/crystal/tools/doc/markdown/markdown.cr b/src/compiler/crystal/tools/doc/markdown/markdown.cr deleted file mode 100644 index 90c595010168..000000000000 --- a/src/compiler/crystal/tools/doc/markdown/markdown.cr +++ /dev/null @@ -1,36 +0,0 @@ -# Basic implementation of Markdown for the `crystal doc` tool. -# -# It lacks many features and it has some bugs too. Eventually we should replace -# it with something more feature-complete (like https://github.com/icyleaf/markd) -# but that means the compiler will start depending on external shards. Otherwise -# we should extract the doc as a separate tool/binary. -# We don't expose this library in the standard library because it's probable -# that we will never make it feature complete. -# -# Usage: -# -# ``` -# require "compiler/crystal/tools/doc/markdown" -# -# text = "## This is title \n This is a [link](https://crystal-lang.org)" -# -# Crystal::Doc::Markdown.to_html(text) -# # =>

    This is title

    -# # =>

    This is a link

    -# ``` -module Crystal::Doc::Markdown - def self.parse(text, renderer) - parser = Parser.new(text, renderer) - parser.parse - end - - def self.to_html(text) : String - String.build do |io| - parse text, Markdown::HTMLRenderer.new(io) - end - end -end - -require "./parser" -require "./renderer" -require "./html_renderer" diff --git a/src/compiler/crystal/tools/doc/markdown/parser.cr b/src/compiler/crystal/tools/doc/markdown/parser.cr deleted file mode 100644 index d6fc1509aadb..000000000000 --- a/src/compiler/crystal/tools/doc/markdown/parser.cr +++ /dev/null @@ -1,642 +0,0 @@ -class Crystal::Doc::Markdown::Parser - record PrefixHeader, count : Int32 - record UnorderedList, char : Char - record CodeFence, language : String - - @lines : Array(String) - - def initialize(text : String, @renderer : Renderer) - @lines = text.lines - @line = 0 - end - - def parse - while @line < @lines.size - process_paragraph - end - end - - def process_paragraph - line = @lines[@line] - - case item = classify(line) - when :empty - @line += 1 - when :header1 - render_header 1, line, 2 - when :header2 - render_header 2, line, 2 - when PrefixHeader - render_prefix_header(item.count, line) - when :code - render_code - when :horizontal_rule - render_horizontal_rule - when UnorderedList - render_unordered_list(item.char) - when CodeFence - render_fenced_code(item.language) - when :ordered_list - render_ordered_list - when :quote - render_quote - else - render_paragraph - end - end - - def classify(line) - if empty? line - return :empty - end - - if pounds = count_pounds line - return PrefixHeader.new(pounds) - end - - if line.starts_with? " " - return :code - end - - if horizontal_rule? line - return :horizontal_rule - end - - if starts_with_bullet_list_marker?(line, '*') - return UnorderedList.new('*') - end - - if starts_with_bullet_list_marker?(line, '+') - return UnorderedList.new('+') - end - - if starts_with_bullet_list_marker?(line, '-') - return UnorderedList.new('-') - end - - if (code_fence = code_fence?(line)) - return code_fence - end - - if starts_with_digits_dot? line - return :ordered_list - end - - if line.starts_with? ">" - return :quote - end - - if next_line_is_all?('=') - return :header1 - end - - if next_line_is_all?('-') - return :header2 - end - - nil - end - - def render_prefix_header(level, line) - bytesize = line.bytesize - str = line.to_unsafe - pos = level - while pos < bytesize && str[pos].unsafe_chr.ascii_whitespace? - pos += 1 - end - - render_header level, line.byte_slice(pos), 1 - end - - def render_header(level : Int32, line : String, increment : Int32) - anchor = line - .underscore # Underscore the string - .gsub(/[^\w\d\s\-.~]/, "") # Delete unsafe URL characters - .strip # Strip leading/trailing whitespace - .gsub(/[\s_-]+/, '-') # Replace `_` and leftover whitespace with `-` - - @renderer.begin_header level, anchor - process_line line - @renderer.end_header level - @line += increment - - append_double_newline_if_has_more - end - - def render_paragraph - @renderer.begin_paragraph - - join_next_lines continue_on: nil - process_line @lines[@line] - @line += 1 - - @renderer.end_paragraph - - append_double_newline_if_has_more - end - - def render_code - @renderer.begin_code nil - - while true - line = @lines[@line] - - break unless has_code_spaces? line - - @renderer.text line.byte_slice(Math.min(line.bytesize, 4)) - @line += 1 - - if @line == @lines.size - break - end - - if next_lines_empty_of_code? - break - end - - newline - end - - @renderer.end_code - - append_double_newline_if_has_more - end - - def render_fenced_code(language : String) - line = @lines[@line] - - @renderer.begin_code language.presence - - @line += 1 - - if @line < @lines.size - while true - line = @lines[@line] - - @renderer.text line - @line += 1 - - if (@line == @lines.size) - break - end - - if code_fence? @lines[@line] - @line += 1 - break - end - - newline - end - end - - @renderer.end_code - - append_double_newline_if_has_more - end - - def render_quote - @renderer.begin_quote - - join_next_lines continue_on: :quote - line = @lines[@line] - - process_line line.byte_slice(line.index('>').not_nil! + 1) - - @line += 1 - - @renderer.end_quote - - append_double_newline_if_has_more - end - - def render_unordered_list(prefix = '*') - @renderer.begin_unordered_list - - while true - break unless starts_with_bullet_list_marker?(@lines[@line], prefix) - - join_next_lines continue_on: nil, stop_on: UnorderedList.new(prefix) - line = @lines[@line] - - if empty? line - @line += 1 - - if @line == @lines.size - break - end - - next - end - - if line.starts_with?(" ") && previous_line_is_not_intended_and_starts_with_bullet_list_marker?(prefix) - @renderer.begin_unordered_list - end - - @renderer.begin_list_item - process_line line.byte_slice(line.index(prefix).not_nil! + 1) - @renderer.end_list_item - - if line.starts_with?(" ") && next_line_is_not_intended? - @renderer.end_unordered_list - end - - @line += 1 - - if @line == @lines.size - break - end - end - - @renderer.end_unordered_list - - append_double_newline_if_has_more - end - - def render_ordered_list - @renderer.begin_ordered_list - - while true - break unless starts_with_digits_dot? @lines[@line] - - join_next_lines continue_on: nil, stop_on: :ordered_list - line = @lines[@line] - - if empty? line - @line += 1 - - if @line == @lines.size - break - end - - next - end - - @renderer.begin_list_item - process_line line.byte_slice(line.index('.').not_nil! + 1) - @renderer.end_list_item - @line += 1 - - if @line == @lines.size - break - end - end - - @renderer.end_ordered_list - - append_double_newline_if_has_more - end - - def append_double_newline_if_has_more - if @line < @lines.size - newline - newline - end - end - - def process_line(line) - bytesize = line.bytesize - str = line.to_unsafe - pos = 0 - - while pos < bytesize && str[pos].unsafe_chr.ascii_whitespace? - pos += 1 - end - - cursor = pos - one_star = false - two_stars = false - one_underscore = false - two_underscores = false - in_link = false - last_is_space = true - - while pos < bytesize - case str[pos].unsafe_chr - when '*' - if pos + 1 < bytesize && str[pos + 1].unsafe_chr == '*' - if two_stars || has_closing?('*', 2, str, (pos + 2), bytesize) - @renderer.text line.byte_slice(cursor, pos - cursor) - pos += 1 - cursor = pos + 1 - if two_stars - @renderer.end_bold - else - @renderer.begin_bold - end - two_stars = !two_stars - end - elsif one_star || has_closing?('*', 1, str, (pos + 1), bytesize) - @renderer.text line.byte_slice(cursor, pos - cursor) - cursor = pos + 1 - if one_star - @renderer.end_italic - else - @renderer.begin_italic - end - one_star = !one_star - end - when '_' - if pos + 1 < bytesize && str[pos + 1].unsafe_chr == '_' - if two_underscores || (last_is_space && has_closing?('_', 2, str, (pos + 2), bytesize)) - @renderer.text line.byte_slice(cursor, pos - cursor) - pos += 1 - cursor = pos + 1 - if two_underscores - @renderer.end_bold - else - @renderer.begin_bold - end - two_underscores = !two_underscores - end - elsif one_underscore || (last_is_space && has_closing?('_', 1, str, (pos + 1), bytesize)) - @renderer.text line.byte_slice(cursor, pos - cursor) - cursor = pos + 1 - if one_underscore - @renderer.end_italic - else - @renderer.begin_italic - end - one_underscore = !one_underscore - end - when '`' - if has_closing?('`', 1, str, (pos + 1), bytesize) - @renderer.text line.byte_slice(cursor, pos - cursor) - cursor = pos + 1 - @renderer.begin_inline_code - idx = (str + pos + 1).to_slice(bytesize).index('`'.ord).not_nil! - @renderer.text line.byte_slice(cursor, idx) - pos = pos + 1 + idx - @renderer.end_inline_code - cursor = pos + 1 - end - when '!' - if pos + 1 < bytesize && str[pos + 1] === '[' - link = check_link str, (pos + 2), bytesize - if link - @renderer.text line.byte_slice(cursor, pos - cursor) - - bracket_idx = (str + pos + 2).to_slice(bytesize - pos - 2).index(']'.ord).not_nil! - alt = line.byte_slice(pos + 2, bracket_idx) - - @renderer.image link, alt - - paren_idx = (str + pos + 2 + bracket_idx + 1).to_slice(bytesize - pos - 2 - bracket_idx - 1).index(')'.ord).not_nil! - pos += 2 + bracket_idx + 1 + paren_idx - cursor = pos + 1 - end - end - when '[' - unless in_link - if link = check_link str, (pos + 1), bytesize - @renderer.text line.byte_slice(cursor, pos - cursor) - cursor = pos + 1 - @renderer.begin_link link - in_link = true - end - end - when ']' - if in_link - @renderer.text line.byte_slice(cursor, pos - cursor) - @renderer.end_link - - paren_idx = (str + pos + 1).to_slice(bytesize - pos - 1).index(')'.ord).not_nil! - pos += paren_idx + 1 - cursor = pos + 1 - in_link = false - end - end - last_is_space = pos < bytesize && str[pos].unsafe_chr.ascii_whitespace? - pos += 1 - end - - @renderer.text line.byte_slice(cursor, pos - cursor) - end - - def empty?(line) - line_is_all? line, ' ' - end - - def has_closing?(char, count, str, pos, bytesize) - str += pos - bytesize -= pos - idx = str.to_slice(bytesize).index char.ord - return false unless idx - - if count == 2 - return false unless idx + 1 < bytesize && str[idx + 1].unsafe_chr == char - end - - !str[idx - 1].unsafe_chr.ascii_whitespace? - end - - def check_link(str, pos, bytesize) - # We need to count nested brackets to do it right - bracket_count = 1 - while pos < bytesize - case str[pos].unsafe_chr - when '[' - bracket_count += 1 - when ']' - bracket_count -= 1 - if bracket_count == 0 - break - end - end - pos += 1 - end - - return nil unless bracket_count == 0 - bracket_idx = pos - - return nil unless str[bracket_idx + 1] === '(' - - paren_idx = (str + bracket_idx + 1).to_slice(bytesize - bracket_idx - 1).index ')'.ord - return nil unless paren_idx - - String.new(Slice.new(str + bracket_idx + 2, paren_idx - 1)) - end - - def next_line_is_all?(char) - return false unless @line + 1 < @lines.size - - line = @lines[@line + 1] - return false if line.empty? - - line_is_all? line, char - end - - def line_is_all?(line, char) - line.each_byte do |byte| - return false if byte != char.ord - end - true - end - - def count_pounds(line) - bytesize = line.bytesize - str = line.to_unsafe - pos = 0 - while pos < bytesize && pos < 6 && str[pos].unsafe_chr == '#' - pos += 1 - end - pos == 0 ? nil : pos - end - - def has_code_spaces?(line) - bytesize = line.bytesize - str = line.to_unsafe - pos = 0 - while pos < bytesize && pos < 4 && str[pos].unsafe_chr.ascii_whitespace? - pos += 1 - end - - if pos < 4 - pos == bytesize - else - true - end - end - - def starts_with_bullet_list_marker?(line, prefix = nil) - bytesize = line.bytesize - str = line.to_unsafe - pos = 0 - while pos < bytesize && str[pos].unsafe_chr.ascii_whitespace? - pos += 1 - end - - return false unless pos < bytesize - return false unless prefix ? str[pos].unsafe_chr == prefix : (str[pos].unsafe_chr == '*' || str[pos].unsafe_chr == '-' || str[pos].unsafe_chr == '+') - - pos += 1 - - return false unless pos < bytesize - str[pos].unsafe_chr.ascii_whitespace? - end - - def previous_line_is_not_intended_and_starts_with_bullet_list_marker?(prefix) - previous_line = @lines[@line - 1] - !previous_line.starts_with?(" ") && starts_with_bullet_list_marker?(previous_line, prefix) - end - - def next_line_is_not_intended? - return true unless @line + 1 < @lines.size - - next_line = @lines[@line + 1] - !next_line.starts_with?(" ") - end - - def code_fence?(line) - return nil unless line.starts_with?("```") - language = line.lstrip('`').strip - return nil if language.includes? '`' - CodeFence.new(language) - end - - def starts_with_digits_dot?(line) - bytesize = line.bytesize - str = line.to_unsafe - pos = 0 - while pos < bytesize && str[pos].unsafe_chr.ascii_whitespace? - pos += 1 - end - - return false unless pos < bytesize - return false unless str[pos].unsafe_chr.ascii_number? - - while pos < bytesize && str[pos].unsafe_chr.ascii_number? - pos += 1 - end - - return false unless pos < bytesize - str[pos].unsafe_chr == '.' - end - - def next_lines_empty_of_code? - line_number = @line - - while line_number < @lines.size - line = @lines[line_number] - - if empty? line - # Nothing - elsif has_code_spaces? line - return false - else - return true - end - - line_number += 1 - end - - return true - end - - def horizontal_rule?(line) - non_space_char = nil - count = 1 - - line.each_char do |char| - next if char.ascii_whitespace? - - if non_space_char - if char == non_space_char - count += 1 - else - return false - end - else - case char - when '*', '-', '_' - non_space_char = char - else - return false - end - end - end - - count >= 3 - end - - def render_horizontal_rule - @renderer.horizontal_rule - @line += 1 - end - - def newline - @renderer.text "\n" - end - - # Join this line with next lines if they form a paragraph, - # until next lines don't start another entity like a list, - # header, etc. - def join_next_lines(continue_on = :none, stop_on = :none) - start = @line - line = @line - line += 1 - while line < @lines.size - item = classify(@lines[line]) - - case item - when continue_on - # continue - when stop_on - line -= 1 - break - when nil - # paragraph: continue - else - line -= 1 - break - end - - line += 1 - end - line -= 1 if line == @lines.size - - if line > start - @lines[line] = (start..line).join('\n') { |i| @lines[i] } - @line = line - end - end -end diff --git a/src/compiler/crystal/tools/doc/markdown/renderer.cr b/src/compiler/crystal/tools/doc/markdown/renderer.cr deleted file mode 100644 index b278ac3e827b..000000000000 --- a/src/compiler/crystal/tools/doc/markdown/renderer.cr +++ /dev/null @@ -1,27 +0,0 @@ -module Crystal::Doc::Markdown::Renderer - abstract def begin_paragraph - abstract def end_paragraph - abstract def begin_italic - abstract def end_italic - abstract def begin_bold - abstract def end_bold - abstract def begin_header(level : Int32, anchor : String) : Nil - abstract def end_header(level) - abstract def begin_inline_code - abstract def end_inline_code - abstract def begin_code(language) - abstract def end_code - abstract def begin_quote - abstract def end_quote - abstract def begin_unordered_list - abstract def end_unordered_list - abstract def begin_ordered_list - abstract def end_ordered_list - abstract def begin_list_item - abstract def end_list_item - abstract def begin_link(url) - abstract def end_link - abstract def image(url, alt) - abstract def text(text) - abstract def horizontal_rule -end diff --git a/src/compiler/crystal/tools/doc/type.cr b/src/compiler/crystal/tools/doc/type.cr index faa09f51bf50..6c10b70e646a 100644 --- a/src/compiler/crystal/tools/doc/type.cr +++ b/src/compiler/crystal/tools/doc/type.cr @@ -176,7 +176,7 @@ class Crystal::Doc::Type defs << method(def_with_metadata.def, false) end end - defs.sort_by!(stable: true, &.name.downcase) + defs.sort_by!(&.name.downcase) end end end @@ -201,7 +201,7 @@ class Crystal::Doc::Type end end end - class_methods.sort_by!(stable: true, &.name.downcase) + class_methods.sort_by!(&.name.downcase) end end @@ -225,7 +225,7 @@ class Crystal::Doc::Type end end end - macros.sort_by!(stable: true, &.name.downcase) + macros.sort_by!(&.name.downcase) end end @@ -786,7 +786,7 @@ class Crystal::Doc::Type builder.field "full_name", full_name builder.field "name", name builder.field "abstract", abstract? - builder.field "superclass" { superclass.try(&.to_json_simple(builder)) || builder.scalar(nil) } + builder.field "superclass" { (s = superclass) ? s.to_json_simple(builder) : builder.null } builder.field "ancestors" do builder.array do ancestors.each &.to_json_simple(builder) @@ -821,7 +821,7 @@ class Crystal::Doc::Type including_types.each &.to_json_simple(builder) end end - builder.field "namespace" { namespace.try(&.to_json_simple(builder)) || builder.scalar(nil) } + builder.field "namespace" { (n = namespace) ? n.to_json_simple(builder) : builder.null } builder.field "doc", doc builder.field "summary", formatted_summary builder.field "class_methods", class_methods diff --git a/src/compiler/crystal/tools/formatter.cr b/src/compiler/crystal/tools/formatter.cr index 1ded41bea0b0..d5f56aed4d32 100644 --- a/src/compiler/crystal/tools/formatter.cr +++ b/src/compiler/crystal/tools/formatter.cr @@ -3568,7 +3568,16 @@ module Crystal write " " unless has_parentheses skip_space - if exp.is_a?(TupleLiteral) && @token.type != :"{" + # If the number of consecutive `{`s starting a tuple literal is 1 less + # than the level of tuple nesting in the actual AST node, this means the + # parser synthesized a TupleLiteral from multiple expressions, e.g. + # + # return {1, 2}, 3, 4 + # return { {1, 2}, 3, 4 } + # + # The tuple depth is 2 in both cases but only 1 leading curly brace is + # present on the first return. + if exp.is_a?(TupleLiteral) && opening_curly_brace_count < leading_tuple_depth(exp) format_args(exp.elements, has_parentheses) skip_space if has_parentheses else @@ -3582,6 +3591,26 @@ module Crystal false end + def opening_curly_brace_count + @lexer.peek_ahead do + count = 0 + while @lexer.token.type == :"{" + count += 1 + @lexer.next_token_skip_space_or_newline + end + count + end + end + + def leading_tuple_depth(exp) + count = 0 + while exp.is_a?(TupleLiteral) + count += 1 + exp = exp.elements.first? + end + count + end + def visit(node : Yield) if scope = node.scope write_keyword :with, " " diff --git a/src/compiler/crystal/tools/playground/server.cr b/src/compiler/crystal/tools/playground/server.cr index b6ddd6d629c3..69b1f80b57db 100644 --- a/src/compiler/crystal/tools/playground/server.cr +++ b/src/compiler/crystal/tools/playground/server.cr @@ -4,7 +4,7 @@ require "http/server" require "log" require "ecr/macros" require "compiler/crystal/tools/formatter" -require "compiler/crystal/tools/doc/markdown" +require "../../../../../lib/markd/src/markd" module Crystal::Playground Log = ::Log.for("crystal.playground") @@ -247,7 +247,7 @@ module Crystal::Playground end if extname == ".md" || extname == ".cr" - content = Crystal::Doc::Markdown.to_html(content) + content = Markd.to_html(content) end content rescue e diff --git a/src/compiler/crystal/tools/print_hierarchy.cr b/src/compiler/crystal/tools/print_hierarchy.cr index 513a53c2574e..b8f4c09cbfba 100644 --- a/src/compiler/crystal/tools/print_hierarchy.cr +++ b/src/compiler/crystal/tools/print_hierarchy.cr @@ -3,23 +3,24 @@ require "colorize" require "../syntax/ast" module Crystal - def self.print_hierarchy(program, exp, format) + def self.print_hierarchy(program, io, exp, format) case format when "text" - HierarchyPrinter.new(program, exp).execute + TextHierarchyPrinter.new(program, io, exp).execute when "json" - JSONHierarchyPrinter.new(program, exp).execute + JSONHierarchyPrinter.new(program, io, exp).execute else raise "Unknown hierarchy format: #{format}" end end - class HierarchyPrinter + abstract class HierarchyPrinter + abstract def print_all + @llvm_typer : LLVMTyper def initialize(@program : Program, exp : String?) @exp = exp ? Regex.new(exp) : nil - @indents = [] of Bool @targets = Set(Type).new @llvm_typer = @program.llvm_typer end @@ -29,9 +30,7 @@ module Crystal compute_targets(@program.types, exp, false) end - with_color.light_gray.bold.surround(STDOUT) do - print_type @program.object - end + print_all end def compute_targets(types : Array, exp, must_include = false) @@ -95,6 +94,35 @@ module Crystal false end + def must_print?(type : NonGenericClassType | GenericClassType) + !@exp || @targets.includes?(type) + end + + def must_print?(type) + false + end + + def type_size(type) + @llvm_typer.size_of(@llvm_typer.llvm_struct_type(type)) + end + + def ivar_size(ivar) + @llvm_typer.size_of(@llvm_typer.llvm_embedded_type(ivar.type)) + end + end + + class TextHierarchyPrinter < HierarchyPrinter + def initialize(program : Program, @io : IO, exp : String?) + super(program, exp) + @indents = [] of Bool + end + + def print_all + with_color.light_gray.bold.surround(@io) do + print_type @program.object + end + end + def print_subtypes(types) types = types.sort_by &.to_s types.each_with_index do |type, i| @@ -110,8 +138,7 @@ module Crystal unless @indents.empty? print_indent - print "|" - puts + @io << "|\n" end print_type type @@ -119,36 +146,19 @@ module Crystal def print_type_name(type) print_indent - print "+" unless @indents.empty? - print "- " - print type.struct? ? "struct" : "class" - print " " - print type + @io << "+" unless @indents.empty? + @io << "- " << (type.struct? ? "struct" : "class") << " " << type if (type.is_a?(NonGenericClassType) || type.is_a?(GenericClassInstanceType)) && !type.is_a?(PointerInstanceType) && !type.is_a?(ProcInstanceType) - size = @llvm_typer.size_of(@llvm_typer.llvm_struct_type(type)) - with_color.light_gray.surround(STDOUT) do - print " (" - print size.to_s - print " bytes)" + with_color.light_gray.surround(@io) do + @io << " (" << type_size(type) << " bytes)" end end - puts + @io << '\n' end - def print_type(type : NonGenericClassType | GenericClassInstanceType) - print_type_name type - - subtypes = type.subclasses.select { |sub| must_print?(sub) } - print_instance_vars type, !subtypes.empty? - - with_indent do - print_subtypes subtypes - end - end - - def print_type(type : GenericClassType) + def print_type(type : GenericClassType | NonGenericClassType | GenericClassInstanceType) print_type_name type subtypes = type.subclasses.select { |sub| must_print?(sub) } @@ -171,19 +181,13 @@ module Crystal instance_vars.each do |name, var| print_indent - print (@indents.last ? "|" : " ") - if has_subtypes - print " . " - else - print " " - end + @io << (@indents.last ? "|" : " ") << (has_subtypes ? " . " : " ") - with_color.light_gray.surround(STDOUT) do - print name.ljust(max_name_size) - print " : " - print var + with_color.light_gray.surround(@io) do + name.ljust(@io, max_name_size) + @io << " : " << var end - puts + @io << '\n' end end @@ -196,63 +200,40 @@ module Crystal max_name_size = instance_vars.max_of &.name.size - if typed_instance_vars.empty? - max_type_size = 0 - max_bytes_size = 0 - else - max_type_size = typed_instance_vars.max_of &.type.to_s.size - max_bytes_size = typed_instance_vars.max_of { |var| @llvm_typer.size_of(@llvm_typer.llvm_embedded_type(var.type)).to_s.size } - end + max_type_size = typed_instance_vars.max_of?(&.type.to_s.size) || 0 + max_bytes_size = typed_instance_vars.max_of? { |var| ivar_size(var).to_s.size } || 0 instance_vars.each do |ivar| print_indent - print (@indents.last ? "|" : " ") - if has_subtypes - print " . " - else - print " " - end + @io << (@indents.last ? "|" : " ") << (has_subtypes ? " . " : " ") - with_color.light_gray.surround(STDOUT) do - print ivar.name.ljust(max_name_size) - print " : " + with_color.light_gray.surround(@io) do + ivar.name.ljust(@io, max_name_size) + @io << " : " if ivar_type = ivar.type? - print ivar_type.to_s.ljust(max_type_size) - size = @llvm_typer.size_of(@llvm_typer.llvm_embedded_type(ivar_type)) - with_color.light_gray.surround(STDOUT) do - print " (" - print size.to_s.rjust(max_bytes_size) - print " bytes)" + ivar_type.to_s.ljust(@io, max_type_size) + with_color.light_gray.surround(@io) do + @io << " (" + ivar_size(ivar).to_s.rjust(@io, max_bytes_size) + @io << " bytes)" end else - print "MISSING".colorize.red.bright + @io << "MISSING".colorize.red.bright end end - puts + @io << '\n' end end - def must_print?(type : NonGenericClassType) - !(@exp && !@targets.includes?(type)) - end - - def must_print?(type : GenericClassType) - !(@exp && !@targets.includes?(type)) - end - - def must_print?(type) - false - end - def print_indent unless @indents.empty? - print " " + @io << " " 0.upto(@indents.size - 2) do |i| indent = @indents[i] if indent - print "| " + @io << "| " else - print " " + @io << " " end end end @@ -270,27 +251,28 @@ module Crystal end class JSONHierarchyPrinter < HierarchyPrinter - def execute - if exp = @exp - compute_targets(@program.types, exp, false) - end + def initialize(program : Program, io : IO, exp : String?) + super(program, exp) + @json = JSON::Builder.new(io) + end - JSON.build(STDOUT) do |json| - json.object do - print_type(@program.object, json) + def print_all + @json.document do + @json.object do + print_type(@program.object) end end end - def print_subtypes(types, json) + def print_subtypes(types) types = types.sort_by &.to_s - json.field "sub_types" do - json.array do + @json.field "sub_types" do + @json.array do types.each_with_index do |type, index| if must_print? type - json.object do - print_type(type, json) + @json.object do + print_type(type) end end end @@ -298,57 +280,57 @@ module Crystal end end - def print_type_name(type, json) - json.field "name", type.to_s - json.field "kind", type.struct? ? "struct" : "class" + def print_type_name(type) + @json.field "name", type.to_s + @json.field "kind", type.struct? ? "struct" : "class" if (type.is_a?(NonGenericClassType) || type.is_a?(GenericClassInstanceType)) && !type.is_a?(PointerInstanceType) && !type.is_a?(ProcInstanceType) - json.field "size_in_bytes", @llvm_typer.size_of(@llvm_typer.llvm_struct_type(type)) + @json.field "size_in_bytes", type_size(type) end end - def print_type(type : GenericClassType | NonGenericClassType | GenericClassInstanceType, json) - print_type_name(type, json) + def print_type(type : GenericClassType | NonGenericClassType | GenericClassInstanceType) + print_type_name(type) subtypes = type.subclasses.select { |sub| must_print?(sub) } - print_instance_vars(type, !subtypes.empty?, json) - print_subtypes(subtypes, json) + print_instance_vars(type, !subtypes.empty?) + print_subtypes(subtypes) end - def print_type(type, json) + def print_type(type) # Nothing to do end - def print_instance_vars(type : GenericClassType, has_subtypes, json) + def print_instance_vars(type : GenericClassType, has_subtypes) instance_vars = type.instance_vars return if instance_vars.empty? - json.field "instance_vars" do - json.array do + @json.field "instance_vars" do + @json.array do instance_vars.each do |name, var| - json.object do - json.field "name", name.to_s - json.field "type", var.to_s + @json.object do + @json.field "name", name.to_s + @json.field "type", var.to_s end end end end end - def print_instance_vars(type, has_subtypes, json) + def print_instance_vars(type, has_subtypes) instance_vars = type.instance_vars return if instance_vars.empty? instance_vars = instance_vars.values - json.field "instance_vars" do - json.array do + @json.field "instance_vars" do + @json.array do instance_vars.each do |instance_var| if ivar_type = instance_var.type? - json.object do - json.field "name", instance_var.name.to_s - json.field "type", ivar_type.to_s - json.field "size_in_bytes", @llvm_typer.size_of(@llvm_typer.llvm_embedded_type(ivar_type)) + @json.object do + @json.field "name", instance_var.name.to_s + @json.field "type", ivar_type.to_s + @json.field "size_in_bytes", ivar_size(instance_var) end end end diff --git a/src/compiler/crystal/types.cr b/src/compiler/crystal/types.cr index c0188f475c03..a527208a4bbf 100644 --- a/src/compiler/crystal/types.cr +++ b/src/compiler/crystal/types.cr @@ -294,8 +294,8 @@ module Crystal end end - def filter_by(other_type) - restrict other_type, MatchContext.new(self, self, strict: true) + def filter_by(other_type : Type) + Type.common_descendent(self, other_type) end def filter_by_responds_to(name) @@ -1603,6 +1603,8 @@ module Crystal end def run_instance_var_initializer(initializer, instance : GenericClassInstanceType | NonGenericClassType) + return if instance.unbound? + meta_vars = MetaVars.new visitor = MainVisitor.new(program, vars: meta_vars, meta_vars: meta_vars) visitor.scope = instance.metaclass @@ -3152,6 +3154,9 @@ module Crystal property? used = false property? visited = false + # Was this const's value cleaned up by CleanupTransformer yet? + property? cleaned_up = false + # Is this constant accessed with pointerof(...)? property? pointer_read = false @@ -3339,6 +3344,10 @@ module Crystal delegate lookup_first_def, to: instance_type.metaclass + def replace_type_parameters(instance) + base_type.replace_type_parameters(instance).virtual_type.metaclass + end + def each_concrete_type instance_type.subtypes.each do |type| yield type.metaclass diff --git a/src/compress/deflate/reader.cr b/src/compress/deflate/reader.cr index 9a185d809a9c..bde3a187d0fb 100644 --- a/src/compress/deflate/reader.cr +++ b/src/compress/deflate/reader.cr @@ -130,7 +130,7 @@ class Compress::Deflate::Reader < IO end # Closes this reader. - def unbuffered_close + def unbuffered_close : Nil return if @closed @closed = true @@ -140,7 +140,7 @@ class Compress::Deflate::Reader < IO @io.close if @sync_close end - def unbuffered_rewind + def unbuffered_rewind : Nil check_open @io.rewind diff --git a/src/compress/deflate/writer.cr b/src/compress/deflate/writer.cr index e844604f9b6a..1efcef87cdaa 100644 --- a/src/compress/deflate/writer.cr +++ b/src/compress/deflate/writer.cr @@ -54,7 +54,7 @@ class Compress::Deflate::Writer < IO end # See `IO#flush`. - def flush + def flush : Nil return if @closed consume_output LibZ::Flush::SYNC_FLUSH @@ -62,7 +62,7 @@ class Compress::Deflate::Writer < IO end # Closes this writer. Must be invoked after all data has been written. - def close + def close : Nil return if @closed @closed = true diff --git a/src/compress/gzip/reader.cr b/src/compress/gzip/reader.cr index 63b5530dcad7..56dac99326d9 100644 --- a/src/compress/gzip/reader.cr +++ b/src/compress/gzip/reader.cr @@ -138,7 +138,7 @@ class Compress::Gzip::Reader < IO end # Closes this reader. - def unbuffered_close + def unbuffered_close : Nil return if @closed @closed = true @@ -146,7 +146,7 @@ class Compress::Gzip::Reader < IO @io.close if @sync_close end - def unbuffered_rewind + def unbuffered_rewind : Nil check_open @io.rewind diff --git a/src/compress/gzip/writer.cr b/src/compress/gzip/writer.cr index 9cdbe02a370c..3d82548e2d2c 100644 --- a/src/compress/gzip/writer.cr +++ b/src/compress/gzip/writer.cr @@ -88,7 +88,7 @@ class Compress::Gzip::Writer < IO # data has been written yet. # # See `IO#flush`. - def flush + def flush : Nil check_open flate_io = write_header @@ -96,7 +96,7 @@ class Compress::Gzip::Writer < IO end # Closes this writer. Must be invoked after all data has been written. - def close + def close : Nil return if @closed @closed = true diff --git a/src/compress/zip/file.cr b/src/compress/zip/file.cr index 6ac8f6972022..843b0758f0a4 100644 --- a/src/compress/zip/file.cr +++ b/src/compress/zip/file.cr @@ -75,7 +75,7 @@ class Compress::Zip::File end # Closes this zip file. - def close + def close : Nil return if @closed @closed = true if @sync_close diff --git a/src/compress/zip/reader.cr b/src/compress/zip/reader.cr index 080620536a97..9763eb71d2e4 100644 --- a/src/compress/zip/reader.cr +++ b/src/compress/zip/reader.cr @@ -101,7 +101,7 @@ class Compress::Zip::Reader end # Closes this zip reader. - def close + def close : Nil return if @closed @closed = true @io.close if @sync_close diff --git a/src/compress/zip/writer.cr b/src/compress/zip/writer.cr index 2acd1570765d..5c5741d7371c 100644 --- a/src/compress/zip/writer.cr +++ b/src/compress/zip/writer.cr @@ -151,14 +151,14 @@ class Compress::Zip::Writer end # Adds an entry that will have *string* as its contents. - def add(filename_or_entry : String | Entry, string : String) + def add(filename_or_entry : String | Entry, string : String) : Nil add(filename_or_entry) do |io| io << string end end # Adds an entry that will have *bytes* as its contents. - def add(filename_or_entry : String | Entry, bytes : Bytes) + def add(filename_or_entry : String | Entry, bytes : Bytes) : Nil add(filename_or_entry) do |io| io.write(bytes) end @@ -167,7 +167,7 @@ class Compress::Zip::Writer # Adds an entry that will have its data copied from the given *data*. # If the given *data* is a `::File`, it is automatically closed # after data is copied from it. - def add(filename_or_entry : String | Entry, data : IO) + def add(filename_or_entry : String | Entry, data : IO) : Nil add(filename_or_entry) do |io| IO.copy(data, io) data.close if data.is_a?(::File) @@ -175,13 +175,13 @@ class Compress::Zip::Writer end # Adds a directory entry that will have the given *name*. - def add_dir(name) + def add_dir(name) : Nil name = name + '/' unless name.ends_with?('/') add(Entry.new(name)) { } end # Closes this zip writer. - def close + def close : Nil return if @closed @closed = true diff --git a/src/compress/zlib/reader.cr b/src/compress/zlib/reader.cr index df6eecb84ffe..953e313e255a 100644 --- a/src/compress/zlib/reader.cr +++ b/src/compress/zlib/reader.cr @@ -89,7 +89,7 @@ class Compress::Zlib::Reader < IO raise IO::Error.new "Can't flush Compress::Zlib::Reader" end - def unbuffered_close + def unbuffered_close : Nil return if @closed @closed = true @@ -97,7 +97,7 @@ class Compress::Zlib::Reader < IO @io.close if @sync_close end - def unbuffered_rewind + def unbuffered_rewind : Nil check_open @io.rewind diff --git a/src/compress/zlib/writer.cr b/src/compress/zlib/writer.cr index 03e27cf07ab5..bb711be40dc0 100644 --- a/src/compress/zlib/writer.cr +++ b/src/compress/zlib/writer.cr @@ -59,7 +59,7 @@ class Compress::Zlib::Writer < IO # data has been written yet. # # See `IO#flush`. - def flush + def flush : Nil check_open write_header unless @wrote_header @@ -67,7 +67,7 @@ class Compress::Zlib::Writer < IO end # Closes this writer. Must be invoked after all data has been written. - def close + def close : Nil return if @closed @closed = true diff --git a/src/crystal/system/unix/file.cr b/src/crystal/system/unix/file.cr index a6808e21d9a9..850a32fa0f28 100644 --- a/src/crystal/system/unix/file.cr +++ b/src/crystal/system/unix/file.cr @@ -130,10 +130,10 @@ module Crystal::System::File raise ::File::Error.from_os_error("Cannot read link", Errno::ENAMETOOLONG, file: path) end - def self.rename(old_filename, new_filename) + def self.rename(old_filename, new_filename) : ::File::Error? code = LibC.rename(old_filename.check_no_null_byte, new_filename.check_no_null_byte) if code != 0 - raise ::File::Error.from_errno("Error renaming file", file: old_filename, other: new_filename) + ::File::Error.from_errno("Error renaming file", file: old_filename, other: new_filename) end end diff --git a/src/crystal/system/win32/file.cr b/src/crystal/system/win32/file.cr index 8ecdbb6893bc..756e443976e2 100644 --- a/src/crystal/system/win32/file.cr +++ b/src/crystal/system/win32/file.cr @@ -203,9 +203,9 @@ module Crystal::System::File raise NotImplementedError.new("readlink") end - def self.rename(old_path : String, new_path : String) : Nil + def self.rename(old_path : String, new_path : String) : ::File::Error? if LibC.MoveFileExW(to_windows_path(old_path), to_windows_path(new_path), LibC::MOVEFILE_REPLACE_EXISTING) == 0 - raise ::File::Error.from_winerror("Error renaming file", file: old_path, other: new_path) + ::File::Error.from_winerror("Error renaming file", file: old_path, other: new_path) end end diff --git a/src/csv.cr b/src/csv.cr index 1b8e89e07a1f..494931d8f10e 100644 --- a/src/csv.cr +++ b/src/csv.cr @@ -296,7 +296,7 @@ class CSV end # Rewinds this CSV to the beginning, rewinding the underlying IO if any. - def rewind + def rewind : Nil @parser.rewind @parser.next_row if @headers @traversed = false diff --git a/src/csv/builder.cr b/src/csv/builder.cr index 49c8c2149cbd..49410f9647ac 100644 --- a/src/csv/builder.cr +++ b/src/csv/builder.cr @@ -60,7 +60,7 @@ class CSV::Builder end # Appends the given values as a single row, and then a newline. - def row(values : Enumerable) + def row(values : Enumerable) : Nil row do |row| values.each do |value| row << value @@ -69,7 +69,7 @@ class CSV::Builder end # :ditto: - def row(*values) + def row(*values) : Nil row values end @@ -111,7 +111,7 @@ class CSV::Builder end # Appends the given value to this row. - def <<(value : String) + def <<(value : String) : Nil if needs_quotes?(value) @builder.quote_cell value else @@ -120,7 +120,7 @@ class CSV::Builder end # :ditto: - def <<(value : Nil | Bool | Number) + def <<(value : Nil | Bool | Number) : Nil case @quoting when .all? @builder.cell { |io| @@ -134,7 +134,7 @@ class CSV::Builder end # :ditto: - def <<(value) + def <<(value) : Nil self << value.to_s end @@ -151,7 +151,7 @@ class CSV::Builder end # Appends a comma, thus skipping a cell. - def skip_cell + def skip_cell : Nil self << nil end diff --git a/src/csv/lexer.cr b/src/csv/lexer.cr index 2656d53656b1..634ec716b303 100644 --- a/src/csv/lexer.cr +++ b/src/csv/lexer.cr @@ -39,7 +39,7 @@ abstract class CSV::Lexer end # Rewinds this lexer to the beginning - def rewind + def rewind : Nil @column_number = 1 @line_number = 1 @last_empty_column = false diff --git a/src/csv/lexer/io_based.cr b/src/csv/lexer/io_based.cr index 253e0b2f8a3f..f33e83d99d9e 100644 --- a/src/csv/lexer/io_based.cr +++ b/src/csv/lexer/io_based.cr @@ -7,7 +7,7 @@ class CSV::Lexer::IOBased < CSV::Lexer @current_char = @io.read_char || '\0' end - def rewind + def rewind : Nil super @io.rewind @current_char = @io.read_char || '\0' diff --git a/src/csv/lexer/string_based.cr b/src/csv/lexer/string_based.cr index fdffd072ed6e..c9d1193adc34 100644 --- a/src/csv/lexer/string_based.cr +++ b/src/csv/lexer/string_based.cr @@ -11,7 +11,7 @@ class CSV::Lexer::StringBased < CSV::Lexer end end - def rewind + def rewind : Nil super @reader.pos = 0 if @reader.current_char == '\n' diff --git a/src/csv/parser.cr b/src/csv/parser.cr index 987e3bb150b9..ae3ec7c890d8 100644 --- a/src/csv/parser.cr +++ b/src/csv/parser.cr @@ -54,7 +54,7 @@ class CSV::Parser end # Rewinds this parser to the beginning. - def rewind + def rewind : Nil @lexer.rewind end diff --git a/src/deque.cr b/src/deque.cr index 0d55c3abe196..03c7ec4f232c 100644 --- a/src/deque.cr +++ b/src/deque.cr @@ -493,7 +493,7 @@ class Deque(T) # # * For positive *n*, equivalent to `n.times { push(shift) }`. # * For negative *n*, equivalent to `(-n).times { unshift(pop) }`. - def rotate!(n : Int = 1) + def rotate!(n : Int = 1) : Nil return if @size <= 1 if @size == @capacity @start = (@start + n) % @capacity diff --git a/src/enumerable.cr b/src/enumerable.cr index bebb24eff5f8..d8ad4204bcae 100644 --- a/src/enumerable.cr +++ b/src/enumerable.cr @@ -34,7 +34,7 @@ module Enumerable(T) end # Must yield this collection's elements to the block. - abstract def each(&block : T -> _) + abstract def each(&block : T ->) # Returns `true` if the passed block returns a value other than `false` or `nil` # for all elements of the collection. @@ -222,7 +222,7 @@ module Enumerable(T) # ["Alice", "Bob"].compact_map { |name| name.match(/^A./) } # => [Regex::MatchData("Al")] # ``` def compact_map - ary = [] of typeof((yield first).not_nil!) + ary = [] of typeof((yield Enumerable.element_type(self)).not_nil!) each do |e| v = yield e unless v.is_a?(Nil) @@ -525,7 +525,7 @@ module Enumerable(T) # array # => ['A', 'l', 'i', 'c', 'e', 'B', 'o', 'b'] # ``` def flat_map(&block : T -> _) - ary = [] of typeof(flat_map_type(yield first)) + ary = [] of typeof(flat_map_type(yield Enumerable.element_type(self))) each do |e| case v = yield e when Array, Iterator @@ -668,14 +668,26 @@ module Enumerable(T) # For each element in the collection the block is passed an accumulator value (*memo*) and the element. The # result becomes the new value for *memo*. At the end of the iteration, the final value of *memo* is # the return value for the method. The initial value for the accumulator is the first element in the collection. + # If the collection has only one element, that element is returned. # # Raises `Enumerable::EmptyError` if the collection is empty. # # ``` # [1, 2, 3, 4, 5].reduce { |acc, i| acc + i } # => 15 + # [1].reduce { |acc, i| acc + i } # => 1 + # ([] of Int32).reduce { |acc, i| acc + i } # raises Enumerable::EmptyError + # ``` + # + # The block is not required to return a `T`, in which case the accumulator's + # type includes whatever the block returns. + # + # ``` + # # `acc` is an `Int32 | String` + # [1, 2, 3, 4, 5].reduce { |acc, i| "#{acc}-#{i}" } # => "1-2-3-4-5" + # [1].reduce { |acc, i| "#{acc}-#{i}" } # => 1 # ``` def reduce - memo = uninitialized T + memo = uninitialized typeof(reduce(Enumerable.element_type(self)) { |acc, i| yield acc, i }) found = false each do |elem| @@ -706,7 +718,7 @@ module Enumerable(T) # ([] of Int32).reduce? { |acc, i| acc + i } # => nil # ``` def reduce? - memo = uninitialized T + memo = uninitialized typeof(reduce(Enumerable.element_type(self)) { |acc, i| yield acc, i }) found = false each do |elem| @@ -1304,7 +1316,7 @@ module Enumerable(T) # ``` def reject(type : U.class) forall U ary = [] of typeof(begin - e = first + e = Enumerable.element_type(self) e.is_a?(U) ? raise("") : e end) each { |e| ary << e unless e.is_a?(U) } @@ -1551,7 +1563,7 @@ module Enumerable(T) # ([] of Int32).sum { |x| x + 1 } # => 0 # ``` def sum(&block) - sum(additive_identity(Reflect(typeof(yield first)))) do |value| + sum(additive_identity(Reflect(typeof(yield Enumerable.element_type(self))))) do |value| yield value end end @@ -1630,7 +1642,7 @@ module Enumerable(T) # ([] of Int32).product { |x| x + 1 } # => 1 # ``` def product(&block) - product(Reflect(typeof(yield first)).first.multiplicative_identity) do |value| + product(Reflect(typeof(yield Enumerable.element_type(self))).first.multiplicative_identity) do |value| yield value end end @@ -1721,7 +1733,7 @@ module Enumerable(T) # Tuple.new({:a, 1}, {:c, 2}).to_h # => {:a => 1, :c => 2} # ``` def to_h - each_with_object(Hash(typeof(first[0]), typeof(first[1])).new) do |item, hash| + each_with_object(Hash(typeof(Enumerable.element_type(self)[0]), typeof(Enumerable.element_type(self)[1])).new) do |item, hash| hash[item[0]] = item[1] end end diff --git a/src/exception/call_stack.cr b/src/exception/call_stack.cr index f154dfe6f600..eb4045cedf8e 100644 --- a/src/exception/call_stack.cr +++ b/src/exception/call_stack.cr @@ -29,7 +29,7 @@ struct Exception::CallStack @@skip = [] of String - def self.skip(filename) + def self.skip(filename) : Nil @@skip << filename end @@ -106,7 +106,7 @@ struct Exception::CallStack end end - def self.print_backtrace + def self.print_backtrace : Nil backtrace_fn = ->(context : LibUnwind::Context, data : Void*) do last_frame = data.as(RepeatedFrame*) diff --git a/src/fiber/stack_pool.cr b/src/fiber/stack_pool.cr index f3527b949d22..54d03e4ffa5f 100644 --- a/src/fiber/stack_pool.cr +++ b/src/fiber/stack_pool.cr @@ -29,7 +29,7 @@ class Fiber end # Appends a stack to the bottom of the pool. - def release(stack) + def release(stack) : Nil @mutex.synchronize { @deque.push(stack) } end diff --git a/src/file.cr b/src/file.cr index ccbc61384d39..3659a7f50792 100644 --- a/src/file.cr +++ b/src/file.cr @@ -398,7 +398,7 @@ class File < IO::FileDescriptor File.expand_brace_pattern(pattern, expanded_patterns) expanded_patterns.each do |expanded_pattern| - return true if match_single_pattern(expanded_pattern, path) + return true if match_single_pattern(expanded_pattern, path.to_s) end false end @@ -587,12 +587,12 @@ class File < IO::FileDescriptor # Creates a new link (also known as a hard link) at *new_path* to an existing file # given by *old_path*. - def self.link(old_path : Path | String, new_path : Path | String) + def self.link(old_path : Path | String, new_path : Path | String) : Nil Crystal::System::File.link(old_path.to_s, new_path.to_s) end # Creates a symbolic link at *new_path* to an existing file given by *old_path*. - def self.symlink(old_path : Path | String, new_path : Path | String) + def self.symlink(old_path : Path | String, new_path : Path | String) : Nil Crystal::System::File.symlink(old_path.to_s, new_path.to_s) end @@ -773,7 +773,9 @@ class File < IO::FileDescriptor # File.exists?("afile.cr") # => true # ``` def self.rename(old_filename : Path | String, new_filename : Path | String) : Nil - Crystal::System::File.rename(old_filename.to_s, new_filename.to_s) + if error = Crystal::System::File.rename(old_filename.to_s, new_filename.to_s) + raise error + end end # Sets the access and modification times of *filename*. diff --git a/src/file/preader.cr b/src/file/preader.cr index e4999e80df53..9bd9fb73d490 100644 --- a/src/file/preader.cr +++ b/src/file/preader.cr @@ -35,11 +35,11 @@ class File::PReader < IO raise IO::Error.new("Can't flush read-only IO") end - def unbuffered_rewind + def unbuffered_rewind : Nil @pos = 0 end - def unbuffered_close + def unbuffered_close : Nil @closed = true end end diff --git a/src/file_utils.cr b/src/file_utils.cr index ca424ddb6cbb..e82e3c1e15f5 100644 --- a/src/file_utils.cr +++ b/src/file_utils.cr @@ -10,7 +10,7 @@ module FileUtils # ``` # # NOTE: Alias of `Dir.cd` - def cd(path : Path | String) + def cd(path : Path | String) : Nil Dir.cd(path) end @@ -89,7 +89,7 @@ module FileUtils # File.info("afile_copy").permissions.value # => 0o600 # ``` def cp(src_path : Path | String, dest : Path | String) : Nil - dest += File::SEPARATOR + File.basename(src_path) if Dir.exists?(dest) + dest = Path[dest, File.basename(src_path)] if Dir.exists?(dest) File.copy(src_path, dest) end @@ -147,7 +147,7 @@ module FileUtils # # Create a hard link, pointing from /tmp/foo.c to foo.c # FileUtils.ln("foo.c", "/tmp") # ``` - def ln(src_path : Path | String, dest_path : Path | String) + def ln(src_path : Path | String, dest_path : Path | String) : Nil if Dir.exists?(dest_path) File.link(src_path, File.join(dest_path, File.basename(src_path))) else @@ -183,7 +183,7 @@ module FileUtils # # Create a symbolic link pointing from /tmp/src to src # FileUtils.ln_s("src", "/tmp") # ``` - def ln_s(src_path : Path | String, dest_path : Path | String) + def ln_s(src_path : Path | String, dest_path : Path | String) : Nil if Dir.exists?(dest_path) File.symlink(src_path, File.join(dest_path, File.basename(src_path))) else @@ -217,7 +217,7 @@ module FileUtils # # Create a symbolic link pointing from bar.c to foo.c, even if bar.c already exists # FileUtils.ln_sf("foo.c", "bar.c") # ``` - def ln_sf(src_path : Path | String, dest_path : Path | String) + def ln_sf(src_path : Path | String, dest_path : Path | String) : Nil if File.directory?(dest_path) dest_path = File.join(dest_path, File.basename(src_path)) end @@ -306,15 +306,20 @@ module FileUtils # Moves *src_path* to *dest_path*. # + # NOTE: If *src_path* and *dest_path* exist on different mounted filesystems, + # the file at *src_path* is copied to *dest_path* and then removed. + # # ``` # require "file_utils" # # FileUtils.mv("afile", "afile.cr") # ``` - # - # NOTE: Alias of `File.rename` def mv(src_path : Path | String, dest_path : Path | String) : Nil - File.rename(src_path, dest_path) + if error = Crystal::System::File.rename(src_path.to_s, dest_path.to_s) + raise error unless Errno.value.in?(Errno::EXDEV, Errno::EPERM) + cp_r(src_path, dest_path) + rm_r(src_path) + end end # Moves every *srcs* to *dest*. diff --git a/src/float.cr b/src/float.cr index 6a80d730fbf4..79308547933e 100644 --- a/src/float.cr +++ b/src/float.cr @@ -181,6 +181,16 @@ struct Float32 LibM.trunc_f32(self) end + # Returns the least `Float32` that is greater than `self`. + def next_float : Float32 + LibM.nextafter_f32(self, INFINITY) + end + + # Returns the greatest `Float32` that is less than `self`. + def prev_float : Float32 + LibM.nextafter_f32(self, -INFINITY) + end + def **(other : Int32) {% if flag?(:win32) %} self ** other.to_f32 @@ -288,6 +298,16 @@ struct Float64 LibM.trunc_f64(self) end + # Returns the least `Float64` that is greater than `self`. + def next_float : Float64 + LibM.nextafter_f64(self, INFINITY) + end + + # Returns the greatest `Float64` that is less than `self`. + def prev_float : Float64 + LibM.nextafter_f64(self, -INFINITY) + end + def **(other : Int32) {% if flag?(:win32) %} self ** other.to_f64 diff --git a/src/float/printer.cr b/src/float/printer.cr index 244cd9b36559..bd1b307410a6 100644 --- a/src/float/printer.cr +++ b/src/float/printer.cr @@ -47,7 +47,7 @@ module Float::Printer LibC.snprintf(buffer.to_unsafe, BUFFER_SIZE, "%g", v.to_f64) end len = LibC.strlen(buffer) - io.write_utf8 buffer.to_slice[0, len] + io.write_string buffer.to_slice[0, len] return end @@ -65,11 +65,11 @@ module Float::Printer # add integer part digits if decimal_exponent > 0 && !exp_mode # whole number but not big enough to be exp form - io.write_utf8 buffer.to_slice[i, length - i] + io.write_string buffer.to_slice[i, length - i] i = length (point - length).times { io << '0' } elsif i < point - io.write_utf8 buffer.to_slice[i, point - i] + io.write_string buffer.to_slice[i, point - i] i = point end @@ -81,7 +81,7 @@ module Float::Printer end # add fractional part digits - io.write_utf8 buffer.to_slice[i, length - i] + io.write_string buffer.to_slice[i, length - i] i = length # print trailing 0 if whole number or exp notation of power of ten diff --git a/src/http/client.cr b/src/http/client.cr index be561d7b6d35..e0e8f25a00d5 100644 --- a/src/http/client.cr +++ b/src/http/client.cr @@ -259,7 +259,7 @@ class HTTP::Client # Configures this client to perform basic authentication in every # request. - def basic_auth(username, password) + def basic_auth(username, password) : Nil header = "Basic #{Base64.strict_encode("#{username}:#{password}")}" before_request do |request| request.headers["Authorization"] = header @@ -398,7 +398,7 @@ class HTTP::Client # end # client.get "/" # ``` - def before_request(&callback : HTTP::Request ->) + def before_request(&callback : HTTP::Request ->) : Nil before_request = @before_request ||= [] of (HTTP::Request ->) before_request << callback end diff --git a/src/http/cookie.cr b/src/http/cookie.cr index 0ef0b05c0b90..a5707d386cb6 100644 --- a/src/http/cookie.cr +++ b/src/http/cookie.cr @@ -101,7 +101,7 @@ module HTTP end end - def to_cookie_header(io) + def to_cookie_header(io) : Nil io << @name io << '=' io << @value diff --git a/src/http/formdata/builder.cr b/src/http/formdata/builder.cr index 983451f25865..17132bf0af72 100644 --- a/src/http/formdata/builder.cr +++ b/src/http/formdata/builder.cr @@ -39,7 +39,7 @@ module HTTP::FormData # Adds a form part with the given *name* and *value*. *Headers* can # optionally be provided for the form part. - def field(name : String, value, headers : HTTP::Headers = HTTP::Headers.new) + def field(name : String, value, headers : HTTP::Headers = HTTP::Headers.new) : Nil file(name, IO::Memory.new(value.to_s), headers: headers) end @@ -68,7 +68,7 @@ module HTTP::FormData # Finalizes the multipart message, this method must be called before the # generated multipart message written to the IO is considered valid. - def finish + def finish : Nil fail "Cannot finish form-data: no body parts" if @state == :START fail "Cannot finish form-data: already finished" if @state == :FINISHED diff --git a/src/http/server.cr b/src/http/server.cr index 9ad6fc64ae4c..9a62c5f8b07a 100644 --- a/src/http/server.cr +++ b/src/http/server.cr @@ -480,7 +480,7 @@ class HTTP::Server # Gracefully terminates the server. It will process currently accepted # requests, but it won't accept new connections. - def close + def close : Nil raise "Can't close server, it's already closed" if closed? @closed = true diff --git a/src/http/server/handlers/error_handler.cr b/src/http/server/handlers/error_handler.cr index c70f210bf925..9468917674ee 100644 --- a/src/http/server/handlers/error_handler.cr +++ b/src/http/server/handlers/error_handler.cr @@ -12,7 +12,7 @@ class HTTP::ErrorHandler def initialize(@verbose : Bool = false, @log = Log.for("http.server")) end - def call(context) + def call(context) : Nil begin call_next(context) rescue ex : HTTP::Server::ClientError diff --git a/src/http/server/handlers/log_handler.cr b/src/http/server/handlers/log_handler.cr index 36d1c90c7a91..8177ede9c712 100644 --- a/src/http/server/handlers/log_handler.cr +++ b/src/http/server/handlers/log_handler.cr @@ -8,7 +8,7 @@ class HTTP::LogHandler def initialize(@log = Log.for("http.server")) end - def call(context) + def call(context) : Nil start = Time.monotonic begin diff --git a/src/http/server/handlers/static_file_handler.cr b/src/http/server/handlers/static_file_handler.cr index e70b694cb34f..02e782df2968 100644 --- a/src/http/server/handlers/static_file_handler.cr +++ b/src/http/server/handlers/static_file_handler.cr @@ -28,7 +28,7 @@ class HTTP::StaticFileHandler @directory_listing = !!directory_listing end - def call(context) + def call(context) : Nil unless context.request.method.in?("GET", "HEAD") if @fallthrough call_next(context) diff --git a/src/http/server/handlers/websocket_handler.cr b/src/http/server/handlers/websocket_handler.cr index e3a8b64ba3a2..1d9a20ccd354 100644 --- a/src/http/server/handlers/websocket_handler.cr +++ b/src/http/server/handlers/websocket_handler.cr @@ -20,7 +20,7 @@ class HTTP::WebSocketHandler def initialize(&@proc : WebSocket, Server::Context ->) end - def call(context) + def call(context) : Nil unless websocket_upgrade_request? context.request return call_next context end diff --git a/src/http/server/request_processor.cr b/src/http/server/request_processor.cr index 7b72dd3db744..0ecb2e1ccd9c 100644 --- a/src/http/server/request_processor.cr +++ b/src/http/server/request_processor.cr @@ -18,7 +18,7 @@ class HTTP::Server::RequestProcessor @wants_close = false end - def close + def close : Nil @wants_close = true end diff --git a/src/http/server/response.cr b/src/http/server/response.cr index df15aaaa9a6b..1df0d9bfb73d 100644 --- a/src/http/server/response.cr +++ b/src/http/server/response.cr @@ -98,19 +98,19 @@ class HTTP::Server # Upgrades this response, writing headers and yielding the connection `IO` (a socket) to the given block. # This is useful to implement protocol upgrades, such as websockets. - def upgrade(&block : IO ->) + def upgrade(&block : IO ->) : Nil write_headers @upgrade_handler = block end # Flushes the output. This method must be implemented if wrapping the response output. - def flush + def flush : Nil @output.flush end # Closes this response, writing headers and body if not done yet. # This method must be implemented if wrapping the response output. - def close + def close : Nil return if closed? @output.close @@ -134,7 +134,7 @@ class HTTP::Server # # Raises `IO::Error` if the response is closed or headers were already # sent. - def respond_with_status(status : HTTP::Status, message : String? = nil) + def respond_with_status(status : HTTP::Status, message : String? = nil) : Nil check_headers reset @status = status @@ -145,7 +145,7 @@ class HTTP::Server end # :ditto: - def respond_with_status(status : Int, message : String? = nil) + def respond_with_status(status : Int, message : String? = nil) : Nil respond_with_status(HTTP::Status.new(status), message) end @@ -189,7 +189,7 @@ class HTTP::Server @closed = false end - def reset + def reset : Nil @in_buffer_rem = Bytes.empty @out_count = 0 @sync = false @@ -231,7 +231,7 @@ class HTTP::Server @closed end - def close + def close : Nil return if closed? # Conditionally determine based on status if the `content-length` header should be added automatically. diff --git a/src/http/web_socket.cr b/src/http/web_socket.cr index 5a79e56190ba..6516dee2bbb8 100644 --- a/src/http/web_socket.cr +++ b/src/http/web_socket.cr @@ -30,6 +30,8 @@ class HTTP::WebSocket # HTTP::WebSocket.new(URI.parse("http://websocket.example.com:8080/chat")) # Creates a new WebSocket to `websocket.example.com` on port `8080` # HTTP::WebSocket.new(URI.parse("ws://websocket.example.com/chat"), # Creates a new WebSocket to `websocket.example.com` with an Authorization header # HTTP::Headers{"Authorization" => "Bearer authtoken"}) + # HTTP::WebSocket.new( + # URI.parse("ws://user:password@websocket.example.com/chat")) # Creates a new WebSocket to `websocket.example.com` with an HTTP basic auth Authorization header # ``` def self.new(uri : URI | String, headers = HTTP::Headers.new) new(Protocol.new(uri, headers: headers)) @@ -73,7 +75,7 @@ class HTTP::WebSocket end # Sends a message payload (message) to the client. - def send(message) + def send(message) : Nil check_open @ws.send(message) end @@ -91,7 +93,7 @@ class HTTP::WebSocket # Server can send an unsolicited PONG frame which the client should not respond to. # # See `#ping`. - def pong(message = nil) + def pong(message = nil) : Nil check_open @ws.pong(message) end @@ -105,7 +107,7 @@ class HTTP::WebSocket # Sends a close frame to the client, and closes the connection. # The close frame may contain a body (message) that indicates the reason for closing. - def close(code : CloseCode | Int? = nil, message = nil) + def close(code : CloseCode | Int? = nil, message = nil) : Nil return if closed? @closed = true @ws.close(code, message) diff --git a/src/http/web_socket/protocol.cr b/src/http/web_socket/protocol.cr index 4c07889e0327..8aec99bfd148 100644 --- a/src/http/web_socket/protocol.cr +++ b/src/http/web_socket/protocol.cr @@ -74,7 +74,7 @@ class HTTP::WebSocket::Protocol raise "This IO is write-only" end - def flush(final = true) + def flush(final = true) : Nil @websocket.send( @buffer + (@pos % @buffer.size), @opcode, @@ -86,11 +86,11 @@ class HTTP::WebSocket::Protocol end end - def send(data : String) + def send(data : String) : Nil send(data.to_slice, Opcode::TEXT) end - def send(data : Bytes) + def send(data : Bytes) : Nil send(data, Opcode::BINARY) end @@ -100,7 +100,7 @@ class HTTP::WebSocket::Protocol stream_io.flush end - def send(data : Bytes, opcode : Opcode, flags = Flags::FINAL, flush = true) + def send(data : Bytes, opcode : Opcode, flags = Flags::FINAL, flush = true) : Nil write_header(data.size, opcode, flags) write_payload(data) @io.flush if flush @@ -227,7 +227,7 @@ class HTTP::WebSocket::Protocol end end - def pong(message = nil) + def pong(message = nil) : Nil if message send(message.to_slice, Opcode::PONG) else @@ -235,7 +235,7 @@ class HTTP::WebSocket::Protocol end end - def close(code : CloseCode? = nil, message = nil) + def close(code : CloseCode? = nil, message = nil) : Nil return if @io.closed? if message @@ -259,7 +259,7 @@ class HTTP::WebSocket::Protocol @io.close if @sync_close end - def close(code : Int, message = nil) + def close(code : Int, message = nil) : Nil close(CloseCode.new(code), message) end @@ -320,6 +320,9 @@ class HTTP::WebSocket::Protocol if (host = uri.hostname) && (path = uri.request_target) tls = uri.scheme.in?("https", "wss") + if (user = uri.user) && (password = uri.password) + headers["Authorization"] ||= "Basic #{Base64.strict_encode("#{user}:#{password}")}" + end return new(host, path, uri.port, tls, headers) end diff --git a/src/int.cr b/src/int.cr index 57114be4e52b..13c455f1a25f 100644 --- a/src/int.cr +++ b/src/int.cr @@ -616,41 +616,115 @@ struct Int private DIGITS_UPCASE = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" private DIGITS_BASE62 = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" - def to_s(base : Int = 10, *, upcase : Bool = false) : String + # Returns a string representation of this integer. + # + # *base* specifies the radix of the returned string, and must be either 62 or + # a number between 2 and 36. By default, digits above 9 are represented by + # ASCII lowercase letters (`a` for 10, `b` for 11, etc.), but uppercase + # letters may be used if *upcase* is `true`, unless base 62 is used. In that + # case, lowercase letters are used for 10 to 35, and uppercase ones for 36 to + # 61, and *upcase* must be `false`. + # + # *precision* specifies the minimum number of digits in the returned string. + # If there are fewer digits than this number, the string is left-padded by + # zeros. If `self` and *precision* are both zero, returns an empty string. + # + # ``` + # 1234.to_s # => "1234" + # 1234.to_s(2) # => "10011010010" + # 1234.to_s(16) # => "4d2" + # 1234.to_s(16, upcase: true) # => "4D2" + # 1234.to_s(36) # => "ya" + # 1234.to_s(62) # => "jU" + # 1234.to_s(precision: 2) # => "1234" + # 1234.to_s(precision: 6) # => "001234" + # ``` + def to_s(base : Int = 10, *, precision : Int = 1, upcase : Bool = false) : String raise ArgumentError.new("Invalid base #{base}") unless 2 <= base <= 36 || base == 62 raise ArgumentError.new("upcase must be false for base 62") if upcase && base == 62 + raise ArgumentError.new("Precision must be non-negative") unless precision >= 0 - case self - when 0 + case {self, precision} + when {0, 0} + "" + when {0, 1} "0" - when 1 + when {1, 1} "1" else - internal_to_s(base, upcase) do |ptr, count| - String.new(ptr, count, count) + internal_to_s(base, precision, upcase) do |ptr, count, negative| + # reuse the `chars` buffer in `internal_to_s` if possible + if precision <= count || precision <= 128 + if precision > count + difference = precision - count + ptr -= difference + Intrinsics.memset(ptr, '0'.ord.to_u8, difference, false) + count += difference + end + + if negative + ptr -= 1 + ptr.value = '-'.ord.to_u8 + count += 1 + end + + String.new(ptr, count, count) + else + len = precision + (negative ? 1 : 0) + String.new(len) do |buffer| + if negative + buffer.value = '-'.ord.to_u8 + buffer += 1 + end + + Intrinsics.memset(buffer, '0'.ord.to_u8, precision - count, false) + ptr.copy_to(buffer + precision - count, count) + {len, len} + end + end end end end - def to_s(io : IO, base : Int = 10, *, upcase : Bool = false) : Nil + # Appends a string representation of this integer to the given *io*. + # + # *base* specifies the radix of the written string, and must be either 62 or + # a number between 2 and 36. By default, digits above 9 are represented by + # ASCII lowercase letters (`a` for 10, `b` for 11, etc.), but uppercase + # letters may be used if *upcase* is `true`, unless base 62 is used. In that + # case, lowercase letters are used for 10 to 35, and uppercase ones for 36 to + # 61, and *upcase* must be `false`. + # + # *precision* specifies the minimum number of digits in the written string. + # If there are fewer digits than this number, the string is left-padded by + # zeros. If `self` and *precision* are both zero, returns an empty string. + def to_s(io : IO, base : Int = 10, *, precision : Int = 1, upcase : Bool = false) : Nil raise ArgumentError.new("Invalid base #{base}") unless 2 <= base <= 36 || base == 62 raise ArgumentError.new("upcase must be false for base 62") if upcase && base == 62 + raise ArgumentError.new("Precision must be non-negative") unless precision >= 0 - case self - when 0 + case {self, precision} + when {0, 0} + # do nothing + when {0, 1} io << '0' - when 1 + when {1, 1} io << '1' else - internal_to_s(base, upcase) do |ptr, count| - io.write_utf8 Slice.new(ptr, count) + internal_to_s(base, precision, upcase) do |ptr, count, negative| + io << '-' if negative + if precision > count + (precision - count).times { io << '0' } + end + io.write_string Slice.new(ptr, count) end end end - private def internal_to_s(base, upcase = false) + private def internal_to_s(base, precision, upcase = false) # Given sizeof(self) <= 128 bits, we need at most 128 bytes for a base 2 - # representation, plus one byte for the trailing 0. + # representation, plus one byte for the negative sign (possibly used by the + # string-returning overload). chars = uninitialized UInt8[129] ptr_end = chars.to_unsafe + 128 ptr = ptr_end @@ -666,13 +740,8 @@ struct Int num = num.tdiv(base) end - if neg - ptr -= 1 - ptr.value = '-'.ord.to_u8 - end - count = (ptr_end - ptr).to_i32 - yield ptr, count + yield ptr, count, neg end # Writes this integer to the given *io* in the given *format*. diff --git a/src/io.cr b/src/io.cr index 5e941ced23db..58da0a7b6907 100644 --- a/src/io.cr +++ b/src/io.cr @@ -463,8 +463,21 @@ abstract class IO nil end - # Writes a slice of UTF-8 encoded bytes to this `IO`, using the current encoding. - def write_utf8(slice : Bytes) : Nil + # Writes the contents of *slice*, interpreted as a sequence of UTF-8 or ASCII + # characters, into this `IO`. The contents are transcoded into this `IO`'s + # current encoding. + # + # ``` + # bytes = "你".to_slice # => Bytes[228, 189, 160] + # + # io = IO::Memory.new + # io.set_encoding("GB2312") + # io.write_string(bytes) + # io.to_slice # => Bytes[196, 227] + # + # "你".encode("GB2312") # => Bytes[196, 227] + # ``` + def write_string(slice : Bytes) : Nil if encoder = encoder() encoder.write(self, slice) else @@ -474,6 +487,12 @@ abstract class IO nil end + # :ditto: + @[Deprecated("Use `#write_string` instead.")] + def write_utf8(slice : Bytes) : Nil + write_string(slice) + end + private def encoder if encoding = @encoding @encoder ||= Encoder.new(encoding) @@ -1010,10 +1029,7 @@ abstract class IO # String operations (`gets`, `gets_to_end`, `read_char`, `<<`, `print`, `puts` # `printf`) will use this encoding. def set_encoding(encoding : String, invalid : Symbol? = nil) : Nil - if invalid != :skip && ( - encoding.compare("UTF-8", case_insensitive: true) == 0 || - encoding.compare("UTF8", case_insensitive: true) == 0 - ) + if utf8_encoding?(encoding, invalid) @encoding = nil else @encoding = EncodingOptions.new(encoding, invalid) @@ -1030,6 +1046,13 @@ abstract class IO @encoding.try(&.name) || "UTF-8" end + private def utf8_encoding?(encoding : String, invalid : Symbol? = nil) : Bool + invalid.nil? && ( + encoding.compare("UTF-8", case_insensitive: true) == 0 || + encoding.compare("UTF8", case_insensitive: true) == 0 + ) + end + # :nodoc: def has_non_utf8_encoding? : Bool !!@encoding diff --git a/src/io/console.cr b/src/io/console.cr index 849a82af2c97..a5ea2e52530c 100644 --- a/src/io/console.cr +++ b/src/io/console.cr @@ -51,7 +51,7 @@ class IO::FileDescriptor < IO # doing line wise editing by the terminal and only sending the input to # the program on a newline. # Only call this when this IO is a TTY, such as a not redirected stdin. - def cooked! + def cooked! : Nil if LibC.tcgetattr(fd, out mode) != 0 raise IO::Error.from_errno "can't set IO#cooked!" end diff --git a/src/io/delimited.cr b/src/io/delimited.cr index 172362309346..3451b47f4492 100644 --- a/src/io/delimited.cr +++ b/src/io/delimited.cr @@ -111,7 +111,7 @@ class IO::Delimited < IO raise IO::Error.new "Can't write to IO::Delimited" end - def close + def close : Nil return if @closed @closed = true diff --git a/src/io/encoding.cr b/src/io/encoding.cr index 48a2023cdcd3..665b3e3fcfa1 100644 --- a/src/io/encoding.cr +++ b/src/io/encoding.cr @@ -224,17 +224,17 @@ class IO string end - def write(io) + def write(io) : Nil io.write @out_slice @out_slice = Bytes.empty end - def write(io, numbytes) + def write(io, numbytes) : Nil io.write @out_slice[0, numbytes] @out_slice += numbytes end - def advance(numbytes) + def advance(numbytes) : Nil @out_slice += numbytes end diff --git a/src/io/evented.cr b/src/io/evented.cr index 75f60b76145a..c2f12bfebff1 100644 --- a/src/io/evented.cr +++ b/src/io/evented.cr @@ -165,11 +165,11 @@ module IO::Evented event.add timeout end - def evented_reopen + def evented_reopen : Nil evented_close end - def evented_close + def evented_close : Nil @read_event.consume_each &.free @write_event.consume_each &.free diff --git a/src/io/memory.cr b/src/io/memory.cr index 1c1583255183..7d5503024557 100644 --- a/src/io/memory.cr +++ b/src/io/memory.cr @@ -242,7 +242,7 @@ class IO::Memory < IO # io = IO::Memory.new "hello" # io.clear # raises IO::Error # ``` - def clear + def clear : Nil check_open check_resizeable @bytesize = 0 @@ -372,7 +372,7 @@ class IO::Memory < IO # io.close # io.gets_to_end # raises IO::Error (closed stream) # ``` - def close + def close : Nil @closed = true end diff --git a/src/io/sized.cr b/src/io/sized.cr index ad451523fffe..d46a548da432 100644 --- a/src/io/sized.cr +++ b/src/io/sized.cr @@ -76,7 +76,7 @@ class IO::Sized < IO raise IO::Error.new "Can't write to IO::Sized" end - def close + def close : Nil return if @closed @closed = true diff --git a/src/io/stapled.cr b/src/io/stapled.cr index 98d90eadb238..774873476fb6 100644 --- a/src/io/stapled.cr +++ b/src/io/stapled.cr @@ -65,7 +65,7 @@ class IO::Stapled < IO end # Writes a byte to `writer`. - def write_byte(byte : UInt8) + def write_byte(byte : UInt8) : Nil check_open @writer.write_byte(byte) diff --git a/src/iterator.cr b/src/iterator.cr index 179e5aac0493..4f3373c30d29 100644 --- a/src/iterator.cr +++ b/src/iterator.cr @@ -590,7 +590,7 @@ module Iterator(T) # iter = ["a", "b", "c"].each # iter.each { |x| print x, " " } # Prints "a b c" # ``` - def each(& : T -> _) : Nil + def each(& : T ->) : Nil while true value = self.next break if value.is_a?(Stop) diff --git a/src/json/builder.cr b/src/json/builder.cr index 49af4b7bdb6d..65b10fc03c10 100644 --- a/src/json/builder.cr +++ b/src/json/builder.cr @@ -28,7 +28,7 @@ class JSON::Builder end # Starts a document. - def start_document + def start_document : Nil case state = @state.last when StartState @state[-1] = DocumentStartState.new @@ -62,28 +62,28 @@ class JSON::Builder end # Writes a `null` value. - def null + def null : Nil scalar do @io << "null" end end # Writes a boolean value. - def bool(value : Bool) + def bool(value : Bool) : Nil scalar do @io << value end end # Writes an integer. - def number(number : Int) + def number(number : Int) : Nil scalar do @io << number end end # Writes a float. - def number(number : Float) + def number(number : Float) : Nil scalar do case number when .nan? @@ -100,7 +100,7 @@ class JSON::Builder # by invoking `to_s` on it. # # This method can also be used to write the name of an object field. - def string(value) + def string(value) : Nil string = value.to_s scalar(string: true) do @@ -157,14 +157,14 @@ class JSON::Builder # the IO without processing. This is the only method that # might lead to invalid JSON being generated, so you must # be sure that *string* contains a valid JSON string. - def raw(string : String) + def raw(string : String) : Nil scalar do @io << string end end # Writes the start of an array. - def start_array + def start_array : Nil start_scalar increase_indent @state.push ArrayState.new(empty: true) @@ -172,7 +172,7 @@ class JSON::Builder end # Writes the end of an array. - def end_array + def end_array : Nil case state = @state.last when ArrayState @state.pop @@ -193,7 +193,7 @@ class JSON::Builder end # Writes the start of an object. - def start_object + def start_object : Nil start_scalar increase_indent @state.push ObjectState.new(empty: true, name: true) @@ -201,7 +201,7 @@ class JSON::Builder end # Writes the end of an object. - def end_object + def end_object : Nil case state = @state.last when ObjectState unless state.name @@ -235,12 +235,12 @@ class JSON::Builder end # :ditto: - def scalar(value : Int | Float) + def scalar(value : Int | Float) : Nil number(value) end # :ditto: - def scalar(value : String) + def scalar(value : String) : Nil string(value) end diff --git a/src/json/pull_parser.cr b/src/json/pull_parser.cr index c8fe81538201..7258d6d958f0 100644 --- a/src/json/pull_parser.cr +++ b/src/json/pull_parser.cr @@ -103,30 +103,32 @@ class JSON::PullParser next_token case token.kind - when .null? + in .null? @kind = :null - when .false? + in .false? @kind = :bool @bool_value = false - when .true? + in .true? @kind = :bool @bool_value = true - when .int? + in .int? @kind = :int @int_value = token.int_value @raw_value = token.raw_value - when .float? + in .float? @kind = :float @float_value = token.float_value @raw_value = token.raw_value - when .string? + in .string? @kind = :string @string_value = token.string_value - when .begin_array? + in .begin_array? begin_array - when .begin_object? + in .begin_object? begin_object - else + in .eof? + @kind = :eof + in .end_array?, .end_object?, .comma?, .colon? unexpected_token end end @@ -263,7 +265,7 @@ class JSON::PullParser # Reads the new value and fill the a JSON builder with it. # # Use this method with a `JSON::Builder` to read a JSON while building another one. - def read_raw(json) + def read_raw(json) : Nil case @kind when .null? read_next @@ -556,7 +558,7 @@ class JSON::PullParser # # It skips the whole value, not only the next lexer's token. # For example if the next value is an array, the whole array will be skipped. - def skip + def skip : Nil @lexer.skip = true skip_internal @lexer.skip = false diff --git a/src/json/to_json.cr b/src/json/to_json.cr index 0c0bcf482367..2b3224529e84 100644 --- a/src/json/to_json.cr +++ b/src/json/to_json.cr @@ -25,7 +25,7 @@ class Object end struct Nil - def to_json(json : JSON::Builder) + def to_json(json : JSON::Builder) : Nil json.null end @@ -35,13 +35,13 @@ struct Nil end struct Bool - def to_json(json : JSON::Builder) + def to_json(json : JSON::Builder) : Nil json.bool(self) end end struct Int - def to_json(json : JSON::Builder) + def to_json(json : JSON::Builder) : Nil json.number(self) end @@ -51,7 +51,7 @@ struct Int end struct Float - def to_json(json : JSON::Builder) + def to_json(json : JSON::Builder) : Nil json.number(self) end @@ -61,7 +61,7 @@ struct Float end class String - def to_json(json : JSON::Builder) + def to_json(json : JSON::Builder) : Nil json.string(self) end @@ -71,7 +71,7 @@ class String end struct Path - def to_json(json : JSON::Builder) + def to_json(json : JSON::Builder) : Nil @name.to_json(json) end @@ -81,7 +81,7 @@ struct Path end struct Symbol - def to_json(json : JSON::Builder) + def to_json(json : JSON::Builder) : Nil json.string(to_s) end @@ -91,7 +91,7 @@ struct Symbol end class Array - def to_json(json : JSON::Builder) + def to_json(json : JSON::Builder) : Nil json.array do each &.to_json(json) end @@ -99,7 +99,7 @@ class Array end class Deque - def to_json(json : JSON::Builder) + def to_json(json : JSON::Builder) : Nil json.array do each &.to_json(json) end @@ -107,7 +107,7 @@ class Deque end struct Set - def to_json(json : JSON::Builder) + def to_json(json : JSON::Builder) : Nil json.array do each &.to_json(json) end @@ -120,7 +120,7 @@ class Hash # Keys are serialized by invoking `to_json_object_key` on them. # Values are serialized with the usual `to_json(json : JSON::Builder)` # method. - def to_json(json : JSON::Builder) + def to_json(json : JSON::Builder) : Nil json.object do each do |key, value| json.field key.to_json_object_key do @@ -132,7 +132,7 @@ class Hash end struct Tuple - def to_json(json : JSON::Builder) + def to_json(json : JSON::Builder) : Nil json.array do {% for i in 0...T.size %} self[{{i}}].to_json(json) @@ -154,7 +154,7 @@ struct NamedTuple end struct Time::Format - def to_json(value : Time, json : JSON::Builder) + def to_json(value : Time, json : JSON::Builder) : Nil format(value).to_json(json) end end @@ -263,7 +263,7 @@ struct Time # a time value. # # See `#from_json` for reference. - def to_json(json : JSON::Builder) + def to_json(json : JSON::Builder) : Nil json.string(Time::Format::RFC_3339.format(self, fraction_digits: 0)) end end @@ -343,7 +343,7 @@ end # person.to_json # => %({"birth_date":1459859781}) # ``` module Time::EpochConverter - def self.to_json(value : Time, json : JSON::Builder) + def self.to_json(value : Time, json : JSON::Builder) : Nil json.number(value.to_unix) end end @@ -367,7 +367,7 @@ end # timestamp.to_json # => %({"value":1459860483856}) # ``` module Time::EpochMillisConverter - def self.to_json(value : Time, json : JSON::Builder) + def self.to_json(value : Time, json : JSON::Builder) : Nil json.number(value.to_unix_ms) end end @@ -394,7 +394,7 @@ end # raw.to_json # => %({"value":123456789876543212345678987654321}) # ``` module String::RawConverter - def self.to_json(value : String, json : JSON::Builder) + def self.to_json(value : String, json : JSON::Builder) : Nil json.raw(value) end end diff --git a/src/llvm/enums.cr b/src/llvm/enums.cr index 275c8dd8f8a1..a604d1d7d7ce 100644 --- a/src/llvm/enums.cr +++ b/src/llvm/enums.cr @@ -60,6 +60,7 @@ module LLVM ZExt @@kind_ids = load_llvm_kinds_from_names.as(Hash(Attribute, UInt32)) + @@typed_attrs = load_llvm_typed_attributes.as(Array(Attribute)) def each_kind(&block) return if value == 0 @@ -137,6 +138,18 @@ module LLVM kinds end + private def self.load_llvm_typed_attributes + typed_attrs = [] of Attribute + + unless LibLLVM::IS_LT_120 + # LLVM 12 introduced mandatory type parameters for byval and sret + typed_attrs << ByVal + typed_attrs << StructRet + end + + typed_attrs + end + def self.kind_for(member) @@kind_ids[member] end @@ -144,6 +157,11 @@ module LLVM def self.from_kind(kind) @@kind_ids.key_for(kind) end + + def self.requires_type?(kind) + member = from_kind(kind) + @@typed_attrs.includes?(member) + end end {% else %} @[Flags] diff --git a/src/llvm/ext/llvm-versions.txt b/src/llvm/ext/llvm-versions.txt index 2fdbdf9a1c98..7a8af33aa8b5 100644 --- a/src/llvm/ext/llvm-versions.txt +++ b/src/llvm/ext/llvm-versions.txt @@ -1 +1 @@ -11.1 11.0 10.0 9.0 8.0 7.1 6.0 5.0 4.0 3.9 3.8 +12.0 11.1 11.0 10.0 9.0 8.0 7.1 6.0 5.0 4.0 3.9 3.8 diff --git a/src/llvm/ext/llvm_ext.cc b/src/llvm/ext/llvm_ext.cc index 4d2b13cf4193..bd82f564a903 100644 --- a/src/llvm/ext/llvm_ext.cc +++ b/src/llvm/ext/llvm_ext.cc @@ -351,11 +351,36 @@ void LLVMMetadataReplaceAllUsesWith2( void LLVMExtSetCurrentDebugLocation( LLVMBuilderRef Bref, unsigned Line, unsigned Col, LLVMMetadataRef Scope, LLVMMetadataRef InlinedAt) { +#if LLVM_VERSION_GE(12, 0) + if (!Scope) + unwrap(Bref)->SetCurrentDebugLocation(DebugLoc()); + else + unwrap(Bref)->SetCurrentDebugLocation( + DILocation::get(unwrap(Scope)->getContext(), Line, Col, + unwrapDI(Scope), + unwrapDI(InlinedAt))); +#else unwrap(Bref)->SetCurrentDebugLocation( DebugLoc::get(Line, Col, Scope ? unwrap(Scope) : nullptr, InlinedAt ? unwrap(InlinedAt) : nullptr)); +#endif } +#if LLVM_VERSION_GE(3, 9) +// A backported LLVMCreateTypeAttribute for LLVM < 13 +// from https://github.com/llvm/llvm-project/blob/bb8ce25e88218be60d2a4ea9c9b0b721809eff27/llvm/lib/IR/Core.cpp#L167 +LLVMAttributeRef LLVMExtCreateTypeAttribute( + LLVMContextRef C, unsigned KindID, LLVMTypeRef Ty) { + auto &Ctx = *unwrap(C); + auto AttrKind = (Attribute::AttrKind)KindID; +#if LLVM_VERSION_GE(12, 0) + return wrap(Attribute::get(Ctx, AttrKind, unwrap(Ty))); +#else + return wrap(Attribute::get(Ctx, AttrKind)); +#endif +} +#endif + LLVMValueRef LLVMExtBuildCmpxchg( LLVMBuilderRef B, LLVMValueRef PTR, LLVMValueRef Cmp, LLVMValueRef New, LLVMAtomicOrdering SuccessOrdering, LLVMAtomicOrdering FailureOrdering) { diff --git a/src/llvm/function.cr b/src/llvm/function.cr index 7ffa10cb688e..13d6af5ca5cd 100644 --- a/src/llvm/function.cr +++ b/src/llvm/function.cr @@ -19,12 +19,16 @@ struct LLVM::Function LibLLVM.set_function_call_convention(self, cc) end - def add_attribute(attribute : Attribute, index = AttributeIndex::FunctionIndex) + def add_attribute(attribute : Attribute, index = AttributeIndex::FunctionIndex, type : Type? = nil) return if attribute.value == 0 {% if LibLLVM.has_constant?(:AttributeRef) %} context = LibLLVM.get_module_context(LibLLVM.get_global_parent(self)) attribute.each_kind do |kind| - attribute_ref = LibLLVM.create_enum_attribute(context, kind, 0) + if type && LLVM::Attribute.requires_type?(kind) + attribute_ref = LibLLVMExt.create_type_attribute(context, kind, type) + else + attribute_ref = LibLLVM.create_enum_attribute(context, kind, 0) + end LibLLVM.add_attribute_at_index(self, index, attribute_ref) end {% else %} diff --git a/src/llvm/lib_llvm.cr b/src/llvm/lib_llvm.cr index 857987d0a36e..694d1767a63a 100644 --- a/src/llvm/lib_llvm.cr +++ b/src/llvm/lib_llvm.cr @@ -19,6 +19,7 @@ end {% begin %} lib LibLLVM + IS_120 = {{LibLLVM::VERSION.starts_with?("12.0")}} IS_110 = {{LibLLVM::VERSION.starts_with?("11.0")}} IS_100 = {{LibLLVM::VERSION.starts_with?("10.0")}} IS_90 = {{LibLLVM::VERSION.starts_with?("9.0")}} @@ -33,7 +34,10 @@ end IS_LT_70 = IS_38 || IS_39 || IS_40 || IS_50 || IS_60 IS_LT_80 = IS_LT_70 || IS_70 || IS_71 - IS_LT_110 = IS_LT_80 || IS_90 || IS_100 + IS_LT_90 = IS_LT_80 || IS_80 + IS_LT_100 = IS_LT_90 || IS_90 + IS_LT_110 = IS_LT_100 || IS_100 + IS_LT_120 = IS_LT_110 || IS_110 end {% end %} diff --git a/src/llvm/lib_llvm_ext.cr b/src/llvm/lib_llvm_ext.cr index a9d0dd9c7eed..c14e6f3ebc97 100644 --- a/src/llvm/lib_llvm_ext.cr +++ b/src/llvm/lib_llvm_ext.cr @@ -165,4 +165,9 @@ lib LibLLVMExt fun target_machine_enable_global_isel = LLVMExtTargetMachineEnableGlobalIsel(machine : LibLLVM::TargetMachineRef, enable : Bool) fun create_mc_jit_compiler_for_module = LLVMExtCreateMCJITCompilerForModule(jit : LibLLVM::ExecutionEngineRef*, m : LibLLVM::ModuleRef, options : LibLLVM::JITCompilerOptions*, options_length : UInt32, enable_global_isel : Bool, error : UInt8**) : Int32 + + {% unless LibLLVM::IS_38 %} + # LLVMCreateTypeAttribute is implemented in LLVM 13, but needed in 12 + fun create_type_attribute = LLVMExtCreateTypeAttribute(ctx : LibLLVM::ContextRef, kind_id : LibC::UInt, ty : LibLLVM::TypeRef) : LibLLVM::AttributeRef + {% end %} end diff --git a/src/llvm/value_methods.cr b/src/llvm/value_methods.cr index bab4569638ba..30a2e2bb7df1 100644 --- a/src/llvm/value_methods.cr +++ b/src/llvm/value_methods.cr @@ -14,11 +14,16 @@ module LLVM::ValueMethods LibLLVM.get_value_kind(self) end - def add_instruction_attribute(index : Int, attribute : LLVM::Attribute, context : LLVM::Context) + def add_instruction_attribute(index : Int, attribute : LLVM::Attribute, context : LLVM::Context, type : LLVM::Type? = nil) return if attribute.value == 0 {% if LibLLVM.has_constant?(:AttributeRef) %} attribute.each_kind do |kind| - attribute_ref = LibLLVM.create_enum_attribute(context, kind, 0) + if type && LLVM::Attribute.requires_type?(kind) + attribute_ref = LibLLVMExt.create_type_attribute(context, kind, type) + else + attribute_ref = LibLLVM.create_enum_attribute(context, kind, 0) + end + LibLLVM.add_call_site_attribute(self, index, attribute_ref) end {% else %} diff --git a/src/log/broadcast_backend.cr b/src/log/broadcast_backend.cr index 8a942baa374f..b6b558164020 100644 --- a/src/log/broadcast_backend.cr +++ b/src/log/broadcast_backend.cr @@ -15,7 +15,7 @@ class Log::BroadcastBackend < Log::Backend super(:direct) end - def append(backend : Log::Backend, level : Severity) + def append(backend : Log::Backend, level : Severity) : Nil @backends[backend] = level end @@ -47,7 +47,7 @@ class Log::BroadcastBackend < Log::Backend end # :nodoc: - def remove(backend : Log::Backend) + def remove(backend : Log::Backend) : Nil @backends.delete(backend) end end diff --git a/src/log/dispatch.cr b/src/log/dispatch.cr index e62e548bafb1..ba7cd1b00f81 100644 --- a/src/log/dispatch.cr +++ b/src/log/dispatch.cr @@ -51,7 +51,7 @@ class Log spawn write_logs end - def dispatch(entry : Entry, backend : Backend) + def dispatch(entry : Entry, backend : Backend) : Nil @channel.send({entry, backend}) end diff --git a/src/log/format.cr b/src/log/format.cr index 0baebcf26f6a..7a26699d68d7 100644 --- a/src/log/format.cr +++ b/src/log/format.cr @@ -64,17 +64,17 @@ class Log end # Write the entry timestamp in RFC3339 format - def timestamp + def timestamp : Nil @entry.timestamp.to_rfc3339(@io, fraction_digits: 6) end # Write a fixed string - def string(str) + def string(str) : Nil @io << str end # Write the message of the entry - def message + def message : Nil @io << @entry.message end @@ -107,7 +107,7 @@ class Log # It doesn't write any output if the entry data is empty. # Parameters `before` and `after` can be provided to be written around # the value. - def data(*, before = nil, after = nil) + def data(*, before = nil, after = nil) : Nil unless @entry.data.empty? @io << before << @entry.data << after end @@ -130,7 +130,7 @@ class Log # Parameters `before` and `after` can be provided to be written around # the value. `before` defaults to `'\n'` so the exception is written # on a separate line - def exception(*, before = '\n', after = nil) + def exception(*, before = '\n', after = nil) : Nil if ex = @entry.exception @io << before ex.inspect_with_backtrace(@io) @@ -139,7 +139,7 @@ class Log end # Write the program name. See `Log.progname`. - def progname + def progname : Nil @io << Log.progname end @@ -149,7 +149,7 @@ class Log end # Write the `Log::Entry` to the `IO` using this pattern - def self.format(entry, io) + def self.format(entry, io) : Nil new(entry, io).run end diff --git a/src/log/io_backend.cr b/src/log/io_backend.cr index b449444ba439..cc7e38458552 100644 --- a/src/log/io_backend.cr +++ b/src/log/io_backend.cr @@ -14,7 +14,7 @@ class Log::IOBackend < Log::Backend end {% end %} - def write(entry : Entry) + def write(entry : Entry) : Nil format(entry) io.puts io.flush @@ -22,7 +22,7 @@ class Log::IOBackend < Log::Backend # Emits the *entry* to the given *io*. # It uses the `#formatter` to convert. - def format(entry : Entry) + def format(entry : Entry) : Nil @formatter.format(entry, io) end end diff --git a/src/log/main.cr b/src/log/main.cr index e3ea3ba65412..bfa27b870151 100644 --- a/src/log/main.cr +++ b/src/log/main.cr @@ -120,7 +120,7 @@ class Log # Log.context.clear # Log.info { "message with empty context" } # ``` - def clear + def clear : Nil Fiber.current.logging_context = @metadata = Log::Metadata.empty end @@ -142,7 +142,7 @@ class Log end # :ditto: - def set(values) + def set(values) : Nil extend_fiber_context(Fiber.current, values) end diff --git a/src/log/memory_backend.cr b/src/log/memory_backend.cr index 483f9aae0e51..22f979fda566 100644 --- a/src/log/memory_backend.cr +++ b/src/log/memory_backend.cr @@ -7,7 +7,7 @@ class Log::MemoryBackend < Log::Backend super(:direct) end - def write(entry : Log::Entry) + def write(entry : Log::Entry) : Nil @entries << entry end end diff --git a/src/math/libm.cr b/src/math/libm.cr index 1840108f8d42..844e59f10528 100644 --- a/src/math/libm.cr +++ b/src/math/libm.cr @@ -94,6 +94,8 @@ lib LibM fun log1p_f64 = log1p(value : Float64) : Float64 fun logb_f32 = logbf(value : Float32) : Float32 fun logb_f64 = logb(value : Float64) : Float64 + fun nextafter_f32 = nextafterf(from : Float32, to : Float32) : Float32 + fun nextafter_f64 = nextafter(from : Float64, to : Float64) : Float64 fun scalbln_f32 = scalblnf(value1 : Float32, value2 : Int64) : Float32 fun scalbln_f64 = scalbln(value1 : Float64, value2 : Int64) : Float64 fun scalbn_f32 = scalbnf(value1 : Float32, value2 : Int32) : Float32 diff --git a/src/mime/multipart/builder.cr b/src/mime/multipart/builder.cr index e96d6fe121b1..416fc3aa5ded 100644 --- a/src/mime/multipart/builder.cr +++ b/src/mime/multipart/builder.cr @@ -41,7 +41,7 @@ module MIME::Multipart # if `#body_part` is called before this method. # # Can be called multiple times to append to the preamble multiple times. - def preamble(string : String) + def preamble(string : String) : Nil preamble { |io| string.to_s(io) } end @@ -49,7 +49,7 @@ module MIME::Multipart # if `#body_part` is called before this method. # # Can be called multiple times to append to the preamble multiple times. - def preamble(data : Bytes) + def preamble(data : Bytes) : Nil preamble { |io| io.write data } end @@ -57,7 +57,7 @@ module MIME::Multipart # Throws if `#body_part` is called before this method. # # Can be called multiple times to append to the preamble multiple times. - def preamble(preamble_io : IO) + def preamble(preamble_io : IO) : Nil preamble { |io| IO.copy(preamble_io, io) } end @@ -74,21 +74,21 @@ module MIME::Multipart # Appends a body part to the multipart message with the given *headers* # and *string*. Throws if `#finish` or `#epilogue` is called before this # method. - def body_part(headers : HTTP::Headers, string : String) + def body_part(headers : HTTP::Headers, string : String) : Nil body_part_impl(headers) { |io| string.to_s(io) } end # Appends a body part to the multipart message with the given *headers* # and *data*. Throws if `#finish` or `#epilogue` is called before this # method. - def body_part(headers : HTTP::Headers, data : Bytes) + def body_part(headers : HTTP::Headers, data : Bytes) : Nil body_part_impl(headers) { |io| io.write data } end # Appends a body part to the multipart message with the given *headers* # and data from *body_io*. Throws if `#finish` or `#epilogue` is called # before this method. - def body_part(headers : HTTP::Headers, body_io : IO) + def body_part(headers : HTTP::Headers, body_io : IO) : Nil body_part_impl(headers) { |io| IO.copy(body_io, io) } end @@ -102,7 +102,7 @@ module MIME::Multipart # Appends a body part to the multipart message with the given *headers* # and no body data. Throws is `#finish` or `#epilogue` is called before # this method. - def body_part(headers : HTTP::Headers) + def body_part(headers : HTTP::Headers) : Nil body_part_impl(headers, empty: true) { } end @@ -131,7 +131,7 @@ module MIME::Multipart # appended. # # Can be called multiple times to append to the epilogue multiple times. - def epilogue(string : String) + def epilogue(string : String) : Nil epilogue { |io| string.to_s(io) } end @@ -140,7 +140,7 @@ module MIME::Multipart # appended. # # Can be called multiple times to append to the epilogue multiple times. - def epilogue(data : Bytes) + def epilogue(data : Bytes) : Nil epilogue { |io| io.write data } end @@ -149,7 +149,7 @@ module MIME::Multipart # been appended. # # Can be called multiple times to append to the epilogue multiple times. - def epilogue(epilogue_io : IO) + def epilogue(epilogue_io : IO) : Nil epilogue { |io| IO.copy(epilogue_io, io) } end @@ -174,7 +174,7 @@ module MIME::Multipart # Finalizes the multipart message, this method must be called to properly # end the multipart message. - def finish + def finish : Nil fail "Cannot finish multipart: no body parts" if @state == :START || @state == :PREAMBLE fail "Cannot finish multipart: already finished" if @state == :FINISHED diff --git a/src/named_tuple.cr b/src/named_tuple.cr index 32d723e2ffd3..22a9a55e92a2 100644 --- a/src/named_tuple.cr +++ b/src/named_tuple.cr @@ -498,12 +498,18 @@ struct NamedTuple # tuple = {name: "Crystal", year: 2011} # tuple.to_a # => [{:name, "Crystal"}, {:year, 2011}] # ``` + # + # NOTE: `to_a` on an empty named tuple produces an `Array(Tuple(Symbol, NoReturn))` def to_a - ary = Array({typeof(first_key_internal), typeof(first_value_internal)}).new(size) - each do |key, value| - ary << {key.as(typeof(first_key_internal)), value.as(typeof(first_value_internal))} - end - ary + {% if T.size == 0 %} + [] of {Symbol, NoReturn} + {% else %} + [ + {% for key in T %} + { {{key.symbolize}}, self[{{key.symbolize}}] }, + {% end %} + ] + {% end %} end # Returns a `Hash` with the keys and values in this named tuple. @@ -512,9 +518,11 @@ struct NamedTuple # tuple = {name: "Crystal", year: 2011} # tuple.to_h # => {:name => "Crystal", :year => 2011} # ``` + # + # NOTE: `to_h` on an empty named tuple produces a `Hash(Symbol, NoReturn)` def to_h {% if T.size == 0 %} - {} of NoReturn => NoReturn + {} of Symbol => NoReturn {% else %} { {% for key in T %} diff --git a/src/oauth/params.cr b/src/oauth/params.cr index 34a50fa22b2a..9689d4ac479c 100644 --- a/src/oauth/params.cr +++ b/src/oauth/params.cr @@ -4,13 +4,13 @@ struct OAuth::Params @params = [] of {String, String} end - def add(key, value) + def add(key, value) : Nil if value @params << {URI.encode_www_form(key, space_to_plus: false), URI.encode_www_form(value, space_to_plus: false)} end end - def add_query(query) + def add_query(query) : Nil URI::Params.parse(query) do |key, value| add key, value end diff --git a/src/oauth2/access_token/bearer.cr b/src/oauth2/access_token/bearer.cr index 9e5a5c0148d3..c586dbb2cd47 100644 --- a/src/oauth2/access_token/bearer.cr +++ b/src/oauth2/access_token/bearer.cr @@ -13,7 +13,7 @@ class OAuth2::AccessToken::Bearer < OAuth2::AccessToken request.headers["Authorization"] = "Bearer #{access_token}" end - def to_json(json : JSON::Builder) + def to_json(json : JSON::Builder) : Nil json.object do json.field "token_type", "bearer" json.field "access_token", access_token diff --git a/src/oauth2/access_token/mac.cr b/src/oauth2/access_token/mac.cr index 670b57357662..6f7a8f44cbf4 100644 --- a/src/oauth2/access_token/mac.cr +++ b/src/oauth2/access_token/mac.cr @@ -45,7 +45,7 @@ class OAuth2::AccessToken::Mac < OAuth2::AccessToken Base64.strict_encode OpenSSL::HMAC.digest(digest, mac_key, normalized_request_string) end - def to_json(json : JSON::Builder) + def to_json(json : JSON::Builder) : Nil json.object do json.field "token_type", "mac" json.field "access_token", access_token diff --git a/src/oauth2/session.cr b/src/oauth2/session.cr index a3b56f8373c6..1ba9ec088cee 100644 --- a/src/oauth2/session.cr +++ b/src/oauth2/session.cr @@ -18,7 +18,7 @@ class OAuth2::Session # Authenticates an `HTTP::Client`, refreshing the access token if it is expired. # # Invoke this method on an `HTTP::Client` before executing an HTTP request. - def authenticate(http_client) + def authenticate(http_client) : Nil check_refresh_token @access_token.authenticate http_client end diff --git a/src/openssl/ssl/socket.cr b/src/openssl/ssl/socket.cr index 22cf85d24dd1..fa74d45f466d 100644 --- a/src/openssl/ssl/socket.cr +++ b/src/openssl/ssl/socket.cr @@ -151,7 +151,7 @@ abstract class OpenSSL::SSL::Socket < IO end end - def unbuffered_flush + def unbuffered_flush : Nil @bio.io.flush end diff --git a/src/option_parser.cr b/src/option_parser.cr index a7d6727f2a21..ec3f891759e3 100644 --- a/src/option_parser.cr +++ b/src/option_parser.cr @@ -220,7 +220,7 @@ class OptionParser # before, and the flags registered after the call. # # This way, you can group the different options in an easier to read way. - def separator(message = "") + def separator(message = "") : Nil @flags << message.to_s end @@ -259,7 +259,7 @@ class OptionParser # Stops the current parse and returns immediately, leaving the remaining flags # unparsed. This is treated identically to `--` being inserted *behind* the # current parsed flag. - def stop + def stop : Nil @stop = true end diff --git a/src/prelude.cr b/src/prelude.cr index f391f0386ac2..79b5f838b03b 100644 --- a/src/prelude.cr +++ b/src/prelude.cr @@ -28,6 +28,7 @@ require "number" require "annotations" require "array" require "atomic" +require "base64" require "bool" require "box" require "char" diff --git a/src/pretty_print.cr b/src/pretty_print.cr index 7dbf4be6a7cf..038da9d3eb4a 100644 --- a/src/pretty_print.cr +++ b/src/pretty_print.cr @@ -63,7 +63,7 @@ class PrettyPrint end # Appends a text element. - def text(obj) + def text(obj) : Nil obj = obj.to_s width = obj.size return if width == 0 @@ -84,7 +84,7 @@ class PrettyPrint end # Appends an element that can turn into a newline if necessary. - def breakable(sep = " ") + def breakable(sep = " ") : Nil width = sep.size group = @group_stack.last if group.break? @@ -102,7 +102,7 @@ class PrettyPrint # Similar to `#breakable` except # the decision to break or not is determined individually. - def fill_breakable(sep = " ") + def fill_breakable(sep = " ") : Nil group { breakable sep } end @@ -149,7 +149,7 @@ class PrettyPrint # text "," # breakable # ``` - def comma + def comma : Nil text "," breakable end @@ -185,7 +185,7 @@ class PrettyPrint end # Outputs any buffered data. - def flush + def flush : Nil @buffer.each do |data| @output_width = data.output(@output, @output_width) end @@ -249,7 +249,7 @@ class PrettyPrint @break = false end - def break + def break : Nil @break = true end end diff --git a/src/slice.cr b/src/slice.cr index e33c1e40b7ae..f4e518dbf77b 100644 --- a/src/slice.cr +++ b/src/slice.cr @@ -351,6 +351,53 @@ struct Slice(T) Slice.new(size, read_only: read_only) { |i| yield @pointer[i], offset + i } end + # Replaces every element in `self` with the given *value*. Returns `self`. + # + # ``` + # slice = Slice[1, 2, 3, 4] + # slice.fill(2) # => Slice[2, 2, 2, 2] + # slice # => Slice[2, 2, 2, 2] + # ``` + def fill(value : T) : self + check_writable + + {% if T == UInt8 %} + Intrinsics.memset(to_unsafe.as(Void*), value, size, false) + self + {% else %} + {% if Number::Primitive.union_types.includes?(T) %} + if value == 0 + to_unsafe.clear(size) + return self + end + {% end %} + + fill { value } + {% end %} + end + + # Yields each index of `self` to the given block and then assigns + # the block's value in that position. Returns `self`. + # + # Accepts an optional *offset* parameter, which tells the block to start + # counting from there. + # + # ``` + # slice = Slice[2, 1, 1, 1] + # slice.fill { |i| i * i } # => Slice[0, 1, 4, 9] + # slice # => Slice[0, 1, 4, 9] + # slice.fill(offset: 3) { |i| i * i } # => Slice[9, 16, 25, 36] + # slice # => Slice[9, 16, 25, 36] + # ``` + def fill(*, offset : Int = 0, & : Int32 -> T) : self + check_writable + + size.times do |i| + to_unsafe[i] = yield offset + i + end + self + end + def copy_from(source : Pointer(T), count) check_writable check_size(count) @@ -690,8 +737,16 @@ struct Slice(T) # a.sort # => Slice[1, 2, 3] # a # => Slice[3, 1, 2] # ``` - def sort(*, stable : Bool = true) : Slice(T) - dup.sort!(stable: stable) + def sort : Slice(T) + dup.sort! + end + + # :ditto: + # + # This method does not guarantee stability between equally sorting elements. + # Which results in a performance advantage over stable sort. + def unstable_sort : Slice(T) + dup.unstable_sort! end # Returns a new slice with all elements sorted based on the comparator in the @@ -708,12 +763,24 @@ struct Slice(T) # b # => Slice[3, 2, 1] # a # => Slice[3, 1, 2] # ``` - def sort(*, stable : Bool = true, &block : T, T -> U) : Slice(T) forall U + def sort(&block : T, T -> U) : Slice(T) forall U {% unless U <= Int32? %} {% raise "expected block to return Int32 or Nil, not #{U}" %} {% end %} - dup.sort!(stable: stable, &block) + dup.sort! &block + end + + # :ditto: + # + # This method does not guarantee stability between equally sorting elements. + # Which results in a performance advantage over stable sort. + def unstable_sort(&block : T, T -> U) : Slice(T) forall U + {% unless U <= Int32? %} + {% raise "expected block to return Int32 or Nil, not #{U}" %} + {% end %} + + dup.unstable_sort!(&block) end # Modifies `self` by sorting all elements based on the return value of their @@ -724,12 +791,19 @@ struct Slice(T) # a.sort! # a # => Slice[1, 2, 3] # ``` - def sort!(*, stable : Bool = true) : Slice(T) - if stable - Slice.merge_sort!(self) - else - Slice.intro_sort!(to_unsafe, size) - end + def sort! : Slice(T) + Slice.merge_sort!(self) + + self + end + + # :ditto: + # + # This method does not guarantee stability between equally sorting elements. + # Which results in a performance advantage over stable sort. + def unstable_sort! : Slice(T) + Slice.intro_sort!(to_unsafe, size) + self end @@ -746,16 +820,27 @@ struct Slice(T) # a.sort! { |a, b| b <=> a } # a # => Slice[3, 2, 1] # ``` - def sort!(*, stable : Bool = true, &block : T, T -> U) : Slice(T) forall U + def sort!(&block : T, T -> U) : Slice(T) forall U {% unless U <= Int32? %} {% raise "expected block to return Int32 or Nil, not #{U}" %} {% end %} - if stable - Slice.merge_sort!(self, block) - else - Slice.intro_sort!(to_unsafe, size, block) - end + Slice.merge_sort!(self, block) + + self + end + + # :ditto: + # + # This method does not guarantee stability between equally sorting elements. + # Which results in a performance advantage over stable sort. + def unstable_sort!(&block : T, T -> U) : Slice(T) forall U + {% unless U <= Int32? %} + {% raise "expected block to return Int32 or Nil, not #{U}" %} + {% end %} + + Slice.intro_sort!(to_unsafe, size, block) + self end @@ -769,8 +854,16 @@ struct Slice(T) # b # => Slice["fig", "pear", "apple"] # a # => Slice["apple", "pear", "fig"] # ``` - def sort_by(*, stable : Bool = true, &block : T -> _) : Slice(T) - dup.sort_by!(stable: stable) { |e| yield(e) } + def sort_by(&block : T -> _) : Slice(T) + dup.sort_by! { |e| yield(e) } + end + + # :ditto: + # + # This method does not guarantee stability between equally sorting elements. + # Which results in a performance advantage over stable sort. + def unstable_sort_by(&block : T -> _) : Slice(T) + dup.unstable_sort_by! { |e| yield(e) } end # Modifies `self` by sorting all elements. The given block is called for @@ -782,8 +875,20 @@ struct Slice(T) # a.sort_by! { |word| word.size } # a # => Slice["fig", "pear", "apple"] # ``` - def sort_by!(*, stable : Bool = true, &block : T -> _) : Slice(T) - sorted = map { |e| {e, yield(e)} }.sort!(stable: stable) { |x, y| x[1] <=> y[1] } + def sort_by!(&block : T -> _) : Slice(T) + sorted = map { |e| {e, yield(e)} }.sort! { |x, y| x[1] <=> y[1] } + size.times do |i| + to_unsafe[i] = sorted.to_unsafe[i][0] + end + self + end + + # :ditto: + # + # This method does not guarantee stability between equally sorting elements. + # Which results in a performance advantage over stable sort. + def unstable_sort_by!(&block : T -> _) : Slice(T) + sorted = map { |e| {e, yield(e)} }.unstable_sort! { |x, y| x[1] <=> y[1] } size.times do |i| to_unsafe[i] = sorted.to_unsafe[i][0] end diff --git a/src/static_array.cr b/src/static_array.cr index cec9f6d7f2c4..85b74cde53d7 100644 --- a/src/static_array.cr +++ b/src/static_array.cr @@ -168,14 +168,8 @@ struct StaticArray(T, N) # array # => StaticArray[2, 2, 2] # ``` def fill(value : T) : self - {% if Number::Primitive.union_types.includes?(T) %} - if value == 0 - to_unsafe.clear(size) - return self - end - {% end %} - - fill { value } + to_slice.fill(value) + self end # Yields each index of `self` to the given block and then assigns @@ -187,9 +181,7 @@ struct StaticArray(T, N) # array # => StaticArray[0, 1, 4, 9] # ``` def fill(& : Int32 -> T) : self - size.times do |i| - to_unsafe[i] = yield i - end + to_slice.fill { |i| yield i } self end diff --git a/src/string.cr b/src/string.cr index caefdae1243b..c2f0749da5e3 100644 --- a/src/string.cr +++ b/src/string.cr @@ -4958,7 +4958,7 @@ class String # Appends `self` to *io*. def to_s(io : IO) : Nil - io.write_utf8(to_slice) + io.write_string(to_slice) end # Returns the underlying bytes of this String. diff --git a/src/string/builder.cr b/src/string/builder.cr index 6719a2c3e149..1ccd8de10897 100644 --- a/src/string/builder.cr +++ b/src/string/builder.cr @@ -64,6 +64,16 @@ class String::Builder < IO nil end + def write_string(slice : Bytes) : Nil + write(slice) + end + + def set_encoding(encoding : String, invalid : Symbol? = nil) : Nil + unless utf8_encoding?(encoding, invalid) + raise "Can't change encoding of String::Builder" + end + end + def buffer : Pointer(UInt8) @buffer + String::HEADER_SIZE end diff --git a/src/string/formatter.cr b/src/string/formatter.cr index 55b32c1b22e4..44d14d67ac2b 100644 --- a/src/string/formatter.cr +++ b/src/string/formatter.cr @@ -273,7 +273,7 @@ struct String::Formatter(A) temp_buf = temp_buf(len) LibC.snprintf(temp_buf, len, format_buf, float) - @io.write_utf8 Slice.new(temp_buf, len - 1) + @io.write_string Slice.new(temp_buf, len - 1) else raise ArgumentError.new("Expected a float, not #{arg.inspect}") end @@ -323,7 +323,7 @@ struct String::Formatter(A) pad size, flags end - def char(char) + def char(char) : Nil @io << char end diff --git a/src/time.cr b/src/time.cr index fd090773bebd..d85fe3f3ab03 100644 --- a/src/time.cr +++ b/src/time.cr @@ -1113,7 +1113,7 @@ struct Time # # Number of seconds decimals can be selected with *fraction_digits*. # Values accepted are 0 (the default, no decimals), 3 (milliseconds), 6 (microseconds) or 9 (nanoseconds). - def to_rfc3339(io : IO, *, fraction_digits : Int = 0) + def to_rfc3339(io : IO, *, fraction_digits : Int = 0) : Nil Format::RFC_3339.format(to_utc, io, fraction_digits) end diff --git a/src/time/format/custom/iso_8601.cr b/src/time/format/custom/iso_8601.cr index 6d1331e2ee08..3cdaa4dd33e7 100644 --- a/src/time/format/custom/iso_8601.cr +++ b/src/time/format/custom/iso_8601.cr @@ -1,19 +1,19 @@ struct Time::Format module Pattern - def date_time_iso_8601 + def date_time_iso_8601 : Nil year_month_day_iso_8601 char? 'T' time_iso_8601 end - def time_iso_8601 + def time_iso_8601 : Nil hour_minute_second_iso8601 time_zone_z_or_offset end end struct Parser - def year_month_day_iso_8601 + def year_month_day_iso_8601 : Nil year extended_format = char? '-' if current_char == 'W' @@ -58,7 +58,7 @@ struct Time::Format end end - def hour_minute_second_iso8601 + def hour_minute_second_iso8601 : Nil hour_24_zero_padded decimal_seconds = Time::SECONDS_PER_HOUR @@ -106,7 +106,7 @@ struct Time::Format end struct Formatter - def year_month_day_iso_8601 + def year_month_day_iso_8601 : Nil year_month_day end diff --git a/src/time/format/formatter.cr b/src/time/format/formatter.cr index 1abe28655104..3eafed806857 100644 --- a/src/time/format/formatter.cr +++ b/src/time/format/formatter.cr @@ -11,143 +11,143 @@ struct Time::Format def initialize(@time : Time, @io : IO) end - def year + def year : Nil pad4(time.year, '0') end - def year_modulo_100 + def year_modulo_100 : Nil pad2(time.year % 100, '0') end - def year_divided_by_100 + def year_divided_by_100 : Nil io << time.year // 100 end - def full_or_short_year + def full_or_short_year : Nil year end - def calendar_week_year + def calendar_week_year : Nil pad4(time.calendar_week[0], '0') end - def calendar_week_year_modulo100 + def calendar_week_year_modulo100 : Nil pad2(time.calendar_week[0] % 100, '0') end - def month + def month : Nil io << time.month end - def month_zero_padded + def month_zero_padded : Nil pad2 time.month, '0' end - def month_blank_padded + def month_blank_padded : Nil pad2 time.month, ' ' end - def month_name + def month_name : Nil io << get_month_name end - def month_name_upcase + def month_name_upcase : Nil io << get_month_name.upcase end - def short_month_name + def short_month_name : Nil io << get_short_month_name end - def short_month_name_upcase + def short_month_name_upcase : Nil io << get_short_month_name.upcase end - def calendar_week_week + def calendar_week_week : Nil pad2(time.calendar_week[1], '0') end - def day_of_month + def day_of_month : Nil io << time.day end - def day_of_month_zero_padded + def day_of_month_zero_padded : Nil pad2 time.day, '0' end - def day_of_month_blank_padded + def day_of_month_blank_padded : Nil pad2 time.day, ' ' end - def day_name + def day_name : Nil io << get_day_name end - def day_name_upcase + def day_name_upcase : Nil io << get_day_name.upcase end - def short_day_name + def short_day_name : Nil io << get_short_day_name end - def short_day_name_upcase + def short_day_name_upcase : Nil io << get_short_day_name.upcase end - def short_day_name_with_comma? + def short_day_name_with_comma? : Nil short_day_name char ',' whitespace end - def day_of_year_zero_padded + def day_of_year_zero_padded : Nil pad3 time.day_of_year, '0' end - def hour_24_zero_padded + def hour_24_zero_padded : Nil pad2 time.hour, '0' end - def hour_24_blank_padded + def hour_24_blank_padded : Nil pad2 time.hour, ' ' end - def hour_12_zero_padded + def hour_12_zero_padded : Nil h = (time.hour % 12) pad2 (h == 0 ? 12 : h), '0' end - def hour_12_blank_padded + def hour_12_blank_padded : Nil h = (time.hour % 12) pad2 (h == 0 ? 12 : h), ' ' end - def minute + def minute : Nil pad2 time.minute, '0' end - def second + def second : Nil pad2 time.second, '0' end - def milliseconds + def milliseconds : Nil pad3 time.millisecond, '0' end - def microseconds + def microseconds : Nil pad6 time.nanosecond // 1000, '0' end - def nanoseconds + def nanoseconds : Nil pad9 time.nanosecond, '0' end - def second_fraction + def second_fraction : Nil nanoseconds end - def second_fraction?(fraction_digits : Int = 9) + def second_fraction?(fraction_digits : Int = 9) : Nil case fraction_digits when 0 when 3 then char '.'; milliseconds @@ -158,31 +158,31 @@ struct Time::Format end end - def am_pm + def am_pm : Nil io << (time.hour < 12 ? "am" : "pm") end - def am_pm_upcase + def am_pm_upcase : Nil io << (time.hour < 12 ? "AM" : "PM") end - def day_of_week_monday_1_7 + def day_of_week_monday_1_7 : Nil io << time.day_of_week.value end - def day_of_week_sunday_0_6 + def day_of_week_sunday_0_6 : Nil io << time.day_of_week.value % 7 end - def unix_seconds + def unix_seconds : Nil io << time.to_unix end - def time_zone(with_seconds = false) + def time_zone(with_seconds = false) : Nil time_zone_offset(format_seconds: with_seconds) end - def time_zone_z_or_offset(**options) + def time_zone_z_or_offset(**options) : Nil if time.utc? io << 'Z' else @@ -194,23 +194,23 @@ struct Time::Format time.zone.format(io, with_colon: force_colon, with_seconds: format_seconds) end - def time_zone_colon(with_seconds = false) + def time_zone_colon(with_seconds = false) : Nil time_zone_offset(force_colon: true, format_seconds: with_seconds) end - def time_zone_colon_with_seconds + def time_zone_colon_with_seconds : Nil time_zone_colon(with_seconds: true) end - def time_zone_gmt + def time_zone_gmt : Nil io << "GMT" end - def time_zone_rfc2822 + def time_zone_rfc2822 : Nil time_zone_offset(allow_colon: false) end - def time_zone_gmt_or_rfc2822(**options) + def time_zone_gmt_or_rfc2822(**options) : Nil if time.utc? || time.location.name == "UT" || time.location.name == "GMT" time_zone_gmt else @@ -218,7 +218,7 @@ struct Time::Format end end - def time_zone_name(zone = false) + def time_zone_name(zone = false) : Nil if zone io << time.zone.name else @@ -226,7 +226,7 @@ struct Time::Format end end - def char(char, *alternatives) + def char(char, *alternatives) : Nil io << char end @@ -234,7 +234,7 @@ struct Time::Format char(char, *alternatives) end - def whitespace + def whitespace : Nil io << ' ' end @@ -254,28 +254,28 @@ struct Time::Format get_day_name[0, 3] end - def pad2(value, padding) + def pad2(value, padding) : Nil io.write_byte padding.ord.to_u8 if value < 10 io << value end - def pad3(value, padding) + def pad3(value, padding) : Nil io.write_byte padding.ord.to_u8 if value < 100 pad2 value, padding end - def pad4(value, padding) + def pad4(value, padding) : Nil io.write_byte padding.ord.to_u8 if value < 1000 pad3 value, padding end - def pad6(value, padding) + def pad6(value, padding) : Nil io.write_byte padding.ord.to_u8 if value < 100000 io.write_byte padding.ord.to_u8 if value < 10000 pad4 value, padding end - def pad9(value, padding) + def pad9(value, padding) : Nil io.write_byte padding.ord.to_u8 if value < 100000000 io.write_byte padding.ord.to_u8 if value < 10000000 io.write_byte padding.ord.to_u8 if value < 1000000 diff --git a/src/tuple.cr b/src/tuple.cr index adff9a4d3827..ddf4e92e00e5 100644 --- a/src/tuple.cr +++ b/src/tuple.cr @@ -530,6 +530,34 @@ struct Tuple nil end + # :inherit: + def reduce + {% if T.empty? %} + raise Enumerable::EmptyError.new + {% else %} + memo = self[0] + {% for i in 1...T.size %} + memo = yield memo, self[{{ i }}] + {% end %} + memo + {% end %} + end + + # :inherit: + def reduce(memo) + {% for i in 0...T.size %} + memo = yield memo, self[{{ i }}] + {% end %} + memo + end + + # :inherit: + def reduce? + {% unless T.empty? %} + reduce { |memo, elem| yield memo, elem } + {% end %} + end + # Returns the first element of this tuple. Doesn't compile # if the tuple is empty. # diff --git a/src/uuid/json.cr b/src/uuid/json.cr index d9b56f7258ca..040668f4a6b8 100644 --- a/src/uuid/json.cr +++ b/src/uuid/json.cr @@ -32,7 +32,7 @@ struct UUID # uuid = UUID.new("87b3042b-9b9a-41b7-8b15-a93d3f17025e") # uuid.to_json # => "\"87b3042b-9b9a-41b7-8b15-a93d3f17025e\"" # ``` - def to_json(json : JSON::Builder) + def to_json(json : JSON::Builder) : Nil json.string(to_s) end diff --git a/src/xml/builder.cr b/src/xml/builder.cr index 742054bbccdf..e50bba52ca66 100644 --- a/src/xml/builder.cr +++ b/src/xml/builder.cr @@ -247,7 +247,7 @@ class XML::Builder # Forces content written to this writer to be flushed to # this writer's `IO`. - def flush + def flush : Nil call Flush @io.flush diff --git a/src/xml/node.cr b/src/xml/node.cr index cac2abac2bdb..c3fa4d0dd26a 100644 --- a/src/xml/node.cr +++ b/src/xml/node.cr @@ -294,6 +294,19 @@ class XML::Node end end + # Returns namespaces defined on self element directly. + def namespace_definitions : Array(Namespace) + namespaces = [] of Namespace + + ns = @node.value.ns_def + while ns + namespaces << Namespace.new(document, ns) + ns = ns.value.next + end + + namespaces + end + # Returns namespaces in scope for self – those defined on self element # directly or any ancestor node – as an `Array` of `XML::Namespace` objects. # @@ -304,13 +317,8 @@ class XML::Node def namespace_scopes : Array(Namespace) scopes = [] of Namespace - ns_list = LibXML.xmlGetNsList(@node.value.doc, @node) - - if ns_list - while ns_list.value - scopes << Namespace.new(document, ns_list.value) - ns_list += 1 - end + each_namespace do |namespace| + scopes << namespace end scopes diff --git a/src/yaml/any.cr b/src/yaml/any.cr index 880f0c75c330..71b6b2e2e379 100644 --- a/src/yaml/any.cr +++ b/src/yaml/any.cr @@ -312,7 +312,7 @@ struct YAML::Any raw.to_yaml(io) end - def to_json(builder : JSON::Builder) + def to_json(builder : JSON::Builder) : Nil if (raw = self.raw).is_a?(Slice) raise "Can't serialize #{raw.class} to JSON" else diff --git a/src/yaml/builder.cr b/src/yaml/builder.cr index 1e96dd5142a8..73ec669c25cf 100644 --- a/src/yaml/builder.cr +++ b/src/yaml/builder.cr @@ -61,7 +61,7 @@ class YAML::Builder end # Ends a YAML stream. - def end_stream + def end_stream : Nil emit stream_end flush end @@ -96,14 +96,14 @@ class YAML::Builder end # Starts a sequence. - def start_sequence(anchor : String? = nil, tag : String? = nil, style : YAML::SequenceStyle = YAML::SequenceStyle::ANY) + def start_sequence(anchor : String? = nil, tag : String? = nil, style : YAML::SequenceStyle = YAML::SequenceStyle::ANY) : Nil implicit = tag ? 0 : 1 emit sequence_start, get_anchor(anchor), string_to_unsafe(tag), implicit, style increase_nesting end # Ends a sequence. - def end_sequence + def end_sequence : Nil emit sequence_end decrease_nesting end @@ -115,14 +115,14 @@ class YAML::Builder end # Starts a mapping. - def start_mapping(anchor : String? = nil, tag : String? = nil, style : YAML::MappingStyle = YAML::MappingStyle::ANY) + def start_mapping(anchor : String? = nil, tag : String? = nil, style : YAML::MappingStyle = YAML::MappingStyle::ANY) : Nil implicit = tag ? 0 : 1 emit mapping_start, get_anchor(anchor), string_to_unsafe(tag), implicit, style increase_nesting end # Ends a mapping. - def end_mapping + def end_mapping : Nil emit mapping_end decrease_nesting end @@ -185,7 +185,7 @@ class YAML::Builder end # Closes the builder, freeing up resources. - def close + def close : Nil finalize @closed = true end diff --git a/src/yaml/nodes/parser.cr b/src/yaml/nodes/parser.cr index 20fd0aed550b..d63cf093196c 100644 --- a/src/yaml/nodes/parser.cr +++ b/src/yaml/nodes/parser.cr @@ -46,7 +46,7 @@ class YAML::Nodes::Parser < YAML::Parser node.start_column = @pull_parser.start_column.to_i end - def end_value(node) + def end_value(node) : Nil node.end_line = @pull_parser.end_line.to_i node.end_column = @pull_parser.end_column.to_i end @@ -72,15 +72,15 @@ class YAML::Nodes::Parser < YAML::Parser documents << document end - def add_to_document(document, node) + def add_to_document(document, node) : Nil document << node end - def add_to_sequence(sequence, node) + def add_to_sequence(sequence, node) : Nil sequence << node end - def add_to_mapping(mapping, key, value) + def add_to_mapping(mapping, key, value) : Nil mapping[key] = value end end diff --git a/src/yaml/parser.cr b/src/yaml/parser.cr index 547ff56a72e8..5b27cee2a3b9 100644 --- a/src/yaml/parser.cr +++ b/src/yaml/parser.cr @@ -157,7 +157,7 @@ abstract class YAML::Parser end # Closes this parser, freeing up resources. - def close + def close : Nil @pull_parser.close end diff --git a/src/yaml/pull_parser.cr b/src/yaml/pull_parser.cr index f24a8a5182a8..1cdfa8131d92 100644 --- a/src/yaml/pull_parser.cr +++ b/src/yaml/pull_parser.cr @@ -260,24 +260,24 @@ class YAML::PullParser # Note: YAML starts counting from 0, we want to count from 1 - def location + def location : {Int32, Int32} {start_line, start_column} end - def start_line : Int - @event.start_mark.line + 1 + def start_line : Int32 + @event.start_mark.line.to_i32 + 1 end - def start_column : Int - @event.start_mark.column + 1 + def start_column : Int32 + @event.start_mark.column.to_i32 + 1 end - def end_line : Int - @event.end_mark.line + 1 + def end_line : Int32 + @event.end_mark.line.to_i32 + 1 end - def end_column : Int - @event.end_mark.column + 1 + def end_column : Int32 + @event.end_mark.column.to_i32 + 1 end private def problem_line_number @@ -321,7 +321,7 @@ class YAML::PullParser LibYAML.yaml_event_delete(pointerof(@event)) end - def close + def close : Nil finalize @closed = true end diff --git a/src/yaml/schema/core/parser.cr b/src/yaml/schema/core/parser.cr index 0b4402b853a7..bc17ef63856d 100644 --- a/src/yaml/schema/core/parser.cr +++ b/src/yaml/schema/core/parser.cr @@ -36,15 +36,15 @@ class YAML::Schema::Core::Parser < YAML::Parser Any.new(Core.parse_scalar(@pull_parser)) end - def add_to_documents(documents, document) + def add_to_documents(documents, document) : Nil documents << document end - def add_to_document(document, node) + def add_to_document(document, node) : Nil document.as_a << node end - def add_to_sequence(sequence, node) + def add_to_sequence(sequence, node) : Nil sequence.as_a << node end diff --git a/src/yaml/schema/fail_safe.cr b/src/yaml/schema/fail_safe.cr index f309537f67b8..2518f104c447 100644 --- a/src/yaml/schema/fail_safe.cr +++ b/src/yaml/schema/fail_safe.cr @@ -50,19 +50,19 @@ module YAML::Schema::FailSafe Any.new(@pull_parser.value) end - def add_to_documents(documents, document) + def add_to_documents(documents, document) : Nil documents << document end - def add_to_document(document, node) + def add_to_document(document, node) : Nil document.as_a << node end - def add_to_sequence(sequence, node) + def add_to_sequence(sequence, node) : Nil sequence.as_a << node end - def add_to_mapping(mapping, key, value) + def add_to_mapping(mapping, key, value) : Nil mapping.as_h[key] = value end end