-
Notifications
You must be signed in to change notification settings - Fork 743
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a lexer for Cython.
- Loading branch information
Showing
4 changed files
with
298 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
cdef extern from 'foo.h': | ||
int foo_int | ||
struct foo_struct: | ||
pass | ||
|
||
ctypedef int word |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
# -*- coding: utf-8 -*- # | ||
# frozen_string_literal: true | ||
|
||
module Rouge | ||
module Lexers | ||
load_lexer 'python.rb' | ||
|
||
class Cython < Python | ||
title "Cython" | ||
desc "Cython and Pyrex source code (cython.org)" | ||
tag 'cython' | ||
aliases 'pyx', 'pyrex' | ||
filenames '*.pyx', '*.pxd', '*.pxi' | ||
mimetypes 'text/x-cython', 'application/x-cython' | ||
|
||
def initialize(opts = {}) | ||
super opts | ||
@indentation = nil | ||
end | ||
|
||
def self.keywords | ||
@keywords ||= super + %w( | ||
by except? fused gil nogil | ||
) | ||
end | ||
|
||
def self.c_keywords | ||
@ckeywords ||= %w( | ||
public readonly extern api inline enum union | ||
) | ||
end | ||
|
||
identifier = /[a-z_]\w*/i | ||
dotted_identifier = /[a-z_.][\w.]*/i | ||
|
||
prepend :root do | ||
rule %r/cp?def|ctypedef/ do | ||
token Keyword | ||
push :c_definitions | ||
push :c_start | ||
end | ||
|
||
rule %r/(from)((?:\\\s|\s)+)(#{dotted_identifier})((?:\\\s|\s)+)(cimport)/ do | ||
groups Keyword::Namespace, | ||
Text, | ||
Name::Namespace, | ||
Text, | ||
Keyword::Namespace | ||
end | ||
|
||
rule %r/(cimport)(\s+)(#{dotted_identifier})/ do | ||
groups Keyword::Namespace, Text, Name::Namespace | ||
end | ||
|
||
rule %r/(struct)((?:\\\s|\s)+)/ do | ||
groups Keyword, Text | ||
push :classname | ||
end | ||
|
||
mixin :func_call_fix | ||
|
||
rule %r/[(,]/, Punctuation, :c_start | ||
end | ||
|
||
prepend :classname do | ||
rule %r/(?:\\\s|\s)+/, Text | ||
end | ||
|
||
prepend :funcname do | ||
rule %r/(?:\\\s|\s)+/, Text | ||
end | ||
# This is a fix for the way that function calls are lexed in the Python | ||
# lexer. This should be moved to the Python lexer once confirmed that it | ||
# does not cause any regressions. | ||
state :func_call_fix do | ||
rule %r/#{identifier}(?=\()/ do |m| | ||
if self.class.keywords.include? m[0] | ||
token Keyword | ||
elsif self.class.exceptions.include? m[0] | ||
token Name::Builtin | ||
elsif self.class.builtins.include? m[0] | ||
token Name::Builtin | ||
elsif self.class.builtins_pseudo.include? m[0] | ||
token Name::Builtin::Pseudo | ||
else | ||
token Name::Function | ||
end | ||
end | ||
end | ||
|
||
# The Cython lexer adds three states to those already in the Python lexer. | ||
# Calls to `cdef`, `cpdef` and `ctypedef` move the lexer into the :c_start | ||
# state. The primary purpose of this state is to highlight datatypes. Once | ||
# this has been done, the lexer moves to the :c_definitions state where | ||
# the majority of text in a definition is lexed. Finally, newlines cause | ||
# the lexer to move to :c_indent. This state is used to check whether we | ||
# have moved out of a C block. | ||
|
||
state :c_start do | ||
rule %r/[^\S\n]+/, Text | ||
|
||
rule %r/cp?def|ctypedef/, Keyword | ||
|
||
rule %r/(?:un)?signed/, Keyword::Type | ||
|
||
# This rule matches identifiers that could be type declarations. The | ||
# lookahead matches (1) pointers, (2) arrays and (3) variable names. | ||
rule %r/#{identifier}(?=(?:\*+)|(?:[ \t]*\[)|(?:[ \t]+\w))/ do |m| | ||
if self.class.keywords.include? m[0] | ||
token Keyword | ||
pop! | ||
elsif %w(def).include? m[0] | ||
token Keyword | ||
goto :funcname | ||
elsif %w(struct class).include? m[0] | ||
token Keyword::Reserved | ||
goto :classname | ||
elsif self.class.c_keywords.include? m[0] | ||
token Keyword::Reserved | ||
else | ||
token Keyword::Type | ||
pop! | ||
end | ||
end | ||
|
||
rule(//) { pop! } | ||
end | ||
|
||
state :c_definitions do | ||
rule %r/\n/, Text, :c_indent | ||
mixin :root | ||
end | ||
|
||
state :c_indent do | ||
rule %r/[ \t]+/ do |m| | ||
token Text | ||
goto :c_start | ||
|
||
if @indentation.nil? | ||
@indentation = m[0] | ||
elsif @indentation.length > m[0].length | ||
@indentation = nil | ||
pop! 2 # Pop :c_start and :c_definitions | ||
end | ||
end | ||
|
||
rule(//) { @indentation = nil; reset_stack } | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# -*- coding: utf-8 -*- # | ||
# frozen_string_literal: true | ||
|
||
describe Rouge::Lexers::Cython do | ||
let(:subject) { Rouge::Lexers::Cython.new } | ||
|
||
describe 'guessing' do | ||
include Support::Guessing | ||
|
||
it 'guesses by filename' do | ||
assert_guess :filename => 'foo.pyx' | ||
assert_guess :filename => 'foo.pxd' | ||
assert_guess :filename => 'foo.pxi' | ||
end | ||
|
||
it 'guesses by mimetype' do | ||
assert_guess :mimetype => 'text/x-cython' | ||
assert_guess :mimetype => 'application/x-cython' | ||
end | ||
end | ||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
# ------------- | ||
# Cython Syntax | ||
# ------------- | ||
|
||
# Import statements | ||
import numpy | ||
from numpy cimport abs | ||
|
||
# Import statements with selection | ||
import numpy as np | ||
from cython.view cimport array as cvarray | ||
|
||
# Extern values | ||
cdef extern from 'foo.h': | ||
int foo_int | ||
struct foo_struct: | ||
pass | ||
|
||
# Type definitions | ||
ctypedef int word | ||
|
||
# C struct | ||
cdef struct Foo: | ||
pass | ||
|
||
# C class | ||
cdef class MyType(ParentType): | ||
cdef int field | ||
cpdef foo(self, x=*, int k=*) | ||
|
||
# C class with classmethod | ||
cdef class Blah: | ||
def some_method(self): | ||
print self | ||
some_method = classmethod(some_method) | ||
a = 2*3 | ||
print "hi", a | ||
|
||
# C function | ||
cpdef fib(int n): # Cython version | ||
"""Print a Fibonacci series up to n.""" | ||
cdef unsigned int a = 0 | ||
cdef int b | ||
|
||
b = 1 | ||
|
||
while a < n: | ||
print(a) | ||
a, b = b, a+b | ||
|
||
# C function with return type | ||
cpdef unsigned int foo(): | ||
return 1 + 2 | ||
|
||
# C function with no GIL | ||
cpdef int sum3d(int[:, :, :] arr) nogil: | ||
cdef size_t i, j, k, I, J, K | ||
cdef int total = 0 | ||
cdef: | ||
int I, J, K | ||
I = arr.shape[0] | ||
J = arr.shape[1] | ||
K = arr.shape[2] | ||
for i in range(I): | ||
for j in range(J): | ||
for k in range(K): | ||
total += arr[i, j, k] | ||
return total | ||
|
||
# Inline C function | ||
cdef inline int something_fast(int a, int b): | ||
return a*a + b | ||
|
||
# Python function with typed parameters | ||
def foo(int n) | ||
return n + 1 | ||
|
||
# Assignment on declaration | ||
cdef int n = python_call(foo(x,y), a + b + c) - 32 | ||
cdef int [:, :, :] narr_view = narr | ||
|
||
# Definition with C-block syntax | ||
cdef: | ||
int carr[3][3][3] | ||
int [:, :, :] carr_view = carr | ||
|
||
# Iteration with steps | ||
for i from 0 <= i < 10 by 2: | ||
print i | ||
|
||
# ------------- | ||
# Python syntax | ||
# ------------- | ||
|
||
# Function call | ||
print("NumPy sum of the NumPy array before assignments: %s" % narr.sum()) | ||
|
||
# Assignment | ||
narr = np.arange(27, dtype=np.dtype("i")).reshape((3, 3, 3)) | ||
carr_view[...] = narr_view | ||
cyarr_view[:] = narr_view | ||
narr_view[:, :, :] = 3 | ||
carr_view[0, 0, 0] = 100 | ||
|
||
# Equality test | ||
1 + 1 == 2 | ||
1 < 2 | ||
|
||
# For loops | ||
for i in range(i]: | ||
total = total + 1 | ||
|
||
# Function definition | ||
def fib(n): # Python version | ||
"""Print a Fibonacci series up to n.""" | ||
a, b = 0, 1 | ||
while a < n: | ||
print a, | ||
a, b = b, a+b |