Skip to content

Commit

Permalink
Merge branch 'issue-253-ruby-unicode' of https://github.com/kurtmckee…
Browse files Browse the repository at this point in the history
…/pygments into kurtmckee-issue-253-ruby-unicode
  • Loading branch information
Anteru committed Feb 14, 2021
2 parents 87bc368 + 221bf88 commit a2579d3
Show file tree
Hide file tree
Showing 2 changed files with 192 additions and 3 deletions.
10 changes: 7 additions & 3 deletions pygments/lexers/ruby.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,9 +329,13 @@ def intp_string_callback(self, match, ctx):
],
'funcname': [
(r'\(', Punctuation, 'defexpr'),
(r'(?:([a-zA-Z_]\w*)(\.))?'
r'([a-zA-Z_]\w*[!?]?|\*\*?|[-+]@?|'
r'[/%&|^`~]|\[\]=?|<<|>>|<=?>|>=?|===?)',
(r'(?:([a-zA-Z_]\w*)(\.))?' # optional scope name, like "self."
r'('
r'[a-zA-Z\u0080-\uffff][a-zA-Z0-9_\u0080-\uffff]*[!?=]?' # method name
r'|!=|!~|=~|\*\*?|[-+!~]@?|[/%&|^]|<=>|<[<=]?|>[>=]?|===?' # or operator override
r'|\[\]=?' # or element reference/assignment override
r'|`' # or the undocumented backtick override
r')',
bygroups(Name.Class, Operator, Name.Function), '#pop'),
default('#pop')
],
Expand Down
185 changes: 185 additions & 0 deletions tests/test_ruby.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
# -*- coding: utf-8 -*-
"""
Basic RubyLexer Test
~~~~~~~~~~~~~~~~~~~~
:copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

import pytest

from pygments.token import Name, Number, Operator, Text, Token
from pygments.lexers.ruby import RubyLexer


@pytest.fixture(scope='module')
def lexer():
yield RubyLexer()


def test_range_syntax1(lexer):
fragment = u'1..3\n'
tokens = [
(Number.Integer, u'1'),
(Operator, u'..'),
(Number.Integer, u'3'),
(Text, u'\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens


def test_range_syntax2(lexer):
fragment = u'1...3\n'
tokens = [
(Number.Integer, u'1'),
(Operator, u'...'),
(Number.Integer, u'3'),
(Text, u'\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens


def test_range_syntax3(lexer):
fragment = u'1 .. 3\n'
tokens = [
(Number.Integer, u'1'),
(Text, u' '),
(Operator, u'..'),
(Text, u' '),
(Number.Integer, u'3'),
(Text, u'\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens


def test_interpolation_nested_curly(lexer):
fragment = (
u'"A#{ (3..5).group_by { |x| x/2}.map '
u'do |k,v| "#{k}" end.join }" + "Z"\n')

tokens = [
(Token.Literal.String.Double, u'"'),
(Token.Literal.String.Double, u'A'),
(Token.Literal.String.Interpol, u'#{'),
(Token.Text, u' '),
(Token.Punctuation, u'('),
(Token.Literal.Number.Integer, u'3'),
(Token.Operator, u'..'),
(Token.Literal.Number.Integer, u'5'),
(Token.Punctuation, u')'),
(Token.Operator, u'.'),
(Token.Name, u'group_by'),
(Token.Text, u' '),
(Token.Literal.String.Interpol, u'{'),
(Token.Text, u' '),
(Token.Operator, u'|'),
(Token.Name, u'x'),
(Token.Operator, u'|'),
(Token.Text, u' '),
(Token.Name, u'x'),
(Token.Operator, u'/'),
(Token.Literal.Number.Integer, u'2'),
(Token.Literal.String.Interpol, u'}'),
(Token.Operator, u'.'),
(Token.Name, u'map'),
(Token.Text, u' '),
(Token.Keyword, u'do'),
(Token.Text, u' '),
(Token.Operator, u'|'),
(Token.Name, u'k'),
(Token.Punctuation, u','),
(Token.Name, u'v'),
(Token.Operator, u'|'),
(Token.Text, u' '),
(Token.Literal.String.Double, u'"'),
(Token.Literal.String.Interpol, u'#{'),
(Token.Name, u'k'),
(Token.Literal.String.Interpol, u'}'),
(Token.Literal.String.Double, u'"'),
(Token.Text, u' '),
(Token.Keyword, u'end'),
(Token.Operator, u'.'),
(Token.Name, u'join'),
(Token.Text, u' '),
(Token.Literal.String.Interpol, u'}'),
(Token.Literal.String.Double, u'"'),
(Token.Text, u' '),
(Token.Operator, u'+'),
(Token.Text, u' '),
(Token.Literal.String.Double, u'"'),
(Token.Literal.String.Double, u'Z'),
(Token.Literal.String.Double, u'"'),
(Token.Text, u'\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens


def test_operator_methods(lexer):
fragment = u'x.==4\n'
tokens = [
(Token.Name, u'x'),
(Token.Operator, u'.'),
(Token.Name.Operator, u'=='),
(Token.Literal.Number.Integer, u'4'),
(Token.Text, u'\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens


def test_escaped_bracestring(lexer):
fragment = u'str.gsub(%r{\\\\\\\\}, "/")\n'
tokens = [
(Token.Name, u'str'),
(Token.Operator, u'.'),
(Token.Name, u'gsub'),
(Token.Punctuation, u'('),
(Token.Literal.String.Regex, u'%r{'),
(Token.Literal.String.Regex, u'\\\\'),
(Token.Literal.String.Regex, u'\\\\'),
(Token.Literal.String.Regex, u'}'),
(Token.Punctuation, u','),
(Token.Text, u' '),
(Token.Literal.String.Double, u'"'),
(Token.Literal.String.Double, u'/'),
(Token.Literal.String.Double, u'"'),
(Token.Punctuation, u')'),
(Token.Text, u'\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens


@pytest.mark.parametrize(
'method_name',
(
# Bare, un-scoped method names
'a', 'A', 'z', 'Z', 'は', '\u0080', '\uffff',
'aは0_', 'はA__9', '\u0080はa0_', '\uffff__99Z',
# Method names with trailing characters
'aは!', 'はz?', 'はa=',
# Scoped method names
'self.a', 'String.は_', 'example.AZ09_!',
# Operator overrides
'+', '+@', '-', '-@', '!', '!@', '~', '~@',
'*', '**', '/', '%', '&', '^', '`',
'<=>', '<', '<<', '<=', '>', '>>', '>=',
'==', '!=', '===', '=~', '!~',
'[]', '[]=',
)
)
def test_positive_method_names(lexer, method_name):
"""Validate positive method name parsing."""

text = 'def ' + method_name
assert list(lexer.get_tokens(text))[-2] == (Name.Function, method_name.rpartition('.')[2])


@pytest.mark.parametrize('method_name', ('1', '_', '<>', '<<=', '>>=', '&&', '||', '==?', '==!', '===='))
def test_negative_method_names(lexer, method_name):
"""Validate negative method name parsing."""

text = 'def ' + method_name
assert list(lexer.get_tokens(text))[-2] != (Name.Function, method_name)

0 comments on commit a2579d3

Please sign in to comment.