Skip to content

Commit

Permalink
Add Cython lexer (#1287)
Browse files Browse the repository at this point in the history
Add a lexer for Cython.
  • Loading branch information
mwaddoups authored and pyrmont committed Aug 4, 2019
1 parent 72988bf commit d4e52dd
Show file tree
Hide file tree
Showing 4 changed files with 298 additions and 0 deletions.
6 changes: 6 additions & 0 deletions lib/rouge/demos/cython
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
cdef extern from 'foo.h':
int foo_int
struct foo_struct:
pass

ctypedef int word
151 changes: 151 additions & 0 deletions lib/rouge/lexers/cython.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

module Rouge
module Lexers
load_lexer 'python.rb'

class Cython < Python
title "Cython"
desc "Cython and Pyrex source code (cython.org)"
tag 'cython'
aliases 'pyx', 'pyrex'
filenames '*.pyx', '*.pxd', '*.pxi'
mimetypes 'text/x-cython', 'application/x-cython'

def initialize(opts = {})
super opts
@indentation = nil
end

def self.keywords
@keywords ||= super + %w(
by except? fused gil nogil
)
end

def self.c_keywords
@ckeywords ||= %w(
public readonly extern api inline enum union
)
end

identifier = /[a-z_]\w*/i
dotted_identifier = /[a-z_.][\w.]*/i

prepend :root do
rule %r/cp?def|ctypedef/ do
token Keyword
push :c_definitions
push :c_start
end

rule %r/(from)((?:\\\s|\s)+)(#{dotted_identifier})((?:\\\s|\s)+)(cimport)/ do
groups Keyword::Namespace,
Text,
Name::Namespace,
Text,
Keyword::Namespace
end

rule %r/(cimport)(\s+)(#{dotted_identifier})/ do
groups Keyword::Namespace, Text, Name::Namespace
end

rule %r/(struct)((?:\\\s|\s)+)/ do
groups Keyword, Text
push :classname
end

mixin :func_call_fix

rule %r/[(,]/, Punctuation, :c_start
end

prepend :classname do
rule %r/(?:\\\s|\s)+/, Text
end

prepend :funcname do
rule %r/(?:\\\s|\s)+/, Text
end
# This is a fix for the way that function calls are lexed in the Python
# lexer. This should be moved to the Python lexer once confirmed that it
# does not cause any regressions.
state :func_call_fix do
rule %r/#{identifier}(?=\()/ do |m|
if self.class.keywords.include? m[0]
token Keyword
elsif self.class.exceptions.include? m[0]
token Name::Builtin
elsif self.class.builtins.include? m[0]
token Name::Builtin
elsif self.class.builtins_pseudo.include? m[0]
token Name::Builtin::Pseudo
else
token Name::Function
end
end
end

# The Cython lexer adds three states to those already in the Python lexer.
# Calls to `cdef`, `cpdef` and `ctypedef` move the lexer into the :c_start
# state. The primary purpose of this state is to highlight datatypes. Once
# this has been done, the lexer moves to the :c_definitions state where
# the majority of text in a definition is lexed. Finally, newlines cause
# the lexer to move to :c_indent. This state is used to check whether we
# have moved out of a C block.

state :c_start do
rule %r/[^\S\n]+/, Text

rule %r/cp?def|ctypedef/, Keyword

rule %r/(?:un)?signed/, Keyword::Type

# This rule matches identifiers that could be type declarations. The
# lookahead matches (1) pointers, (2) arrays and (3) variable names.
rule %r/#{identifier}(?=(?:\*+)|(?:[ \t]*\[)|(?:[ \t]+\w))/ do |m|
if self.class.keywords.include? m[0]
token Keyword
pop!
elsif %w(def).include? m[0]
token Keyword
goto :funcname
elsif %w(struct class).include? m[0]
token Keyword::Reserved
goto :classname
elsif self.class.c_keywords.include? m[0]
token Keyword::Reserved
else
token Keyword::Type
pop!
end
end

rule(//) { pop! }
end

state :c_definitions do
rule %r/\n/, Text, :c_indent
mixin :root
end

state :c_indent do
rule %r/[ \t]+/ do |m|
token Text
goto :c_start

if @indentation.nil?
@indentation = m[0]
elsif @indentation.length > m[0].length
@indentation = nil
pop! 2 # Pop :c_start and :c_definitions
end
end

rule(//) { @indentation = nil; reset_stack }
end
end
end
end
22 changes: 22 additions & 0 deletions spec/lexers/cython_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

describe Rouge::Lexers::Cython do
let(:subject) { Rouge::Lexers::Cython.new }

describe 'guessing' do
include Support::Guessing

it 'guesses by filename' do
assert_guess :filename => 'foo.pyx'
assert_guess :filename => 'foo.pxd'
assert_guess :filename => 'foo.pxi'
end

it 'guesses by mimetype' do
assert_guess :mimetype => 'text/x-cython'
assert_guess :mimetype => 'application/x-cython'
end
end
end

119 changes: 119 additions & 0 deletions spec/visual/samples/cython
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# -------------
# Cython Syntax
# -------------

# Import statements
import numpy
from numpy cimport abs

# Import statements with selection
import numpy as np
from cython.view cimport array as cvarray

# Extern values
cdef extern from 'foo.h':
int foo_int
struct foo_struct:
pass

# Type definitions
ctypedef int word

# C struct
cdef struct Foo:
pass

# C class
cdef class MyType(ParentType):
cdef int field
cpdef foo(self, x=*, int k=*)

# C class with classmethod
cdef class Blah:
def some_method(self):
print self
some_method = classmethod(some_method)
a = 2*3
print "hi", a

# C function
cpdef fib(int n): # Cython version
"""Print a Fibonacci series up to n."""
cdef unsigned int a = 0
cdef int b

b = 1

while a < n:
print(a)
a, b = b, a+b

# C function with return type
cpdef unsigned int foo():
return 1 + 2

# C function with no GIL
cpdef int sum3d(int[:, :, :] arr) nogil:
cdef size_t i, j, k, I, J, K
cdef int total = 0
cdef:
int I, J, K
I = arr.shape[0]
J = arr.shape[1]
K = arr.shape[2]
for i in range(I):
for j in range(J):
for k in range(K):
total += arr[i, j, k]
return total

# Inline C function
cdef inline int something_fast(int a, int b):
return a*a + b

# Python function with typed parameters
def foo(int n)
return n + 1

# Assignment on declaration
cdef int n = python_call(foo(x,y), a + b + c) - 32
cdef int [:, :, :] narr_view = narr

# Definition with C-block syntax
cdef:
int carr[3][3][3]
int [:, :, :] carr_view = carr

# Iteration with steps
for i from 0 <= i < 10 by 2:
print i

# -------------
# Python syntax
# -------------

# Function call
print("NumPy sum of the NumPy array before assignments: %s" % narr.sum())

# Assignment
narr = np.arange(27, dtype=np.dtype("i")).reshape((3, 3, 3))
carr_view[...] = narr_view
cyarr_view[:] = narr_view
narr_view[:, :, :] = 3
carr_view[0, 0, 0] = 100

# Equality test
1 + 1 == 2
1 < 2

# For loops
for i in range(i]:
total = total + 1

# Function definition
def fib(n): # Python version
"""Print a Fibonacci series up to n."""
a, b = 0, 1
while a < n:
print a,
a, b = b, a+b

0 comments on commit d4e52dd

Please sign in to comment.