Commit be41f7f4 authored by Jean-Philippe Lang's avatar Jean-Philippe Lang

Upgrade Coderay to 0.9.0.

git-svn-id: svn+ssh://rubyforge.org/var/svn/redmine/trunk@3014 e93f8b46-1217-0410-a6f0-8f06a7374b81
parent 3b9d8c2a
= CodeRay - Trunk folder structure
== bench - Benchmarking system
All benchmarking stuff goes here.
Test inputs are stored in files named <code>example.<lang></code>.
Test outputs go to <code>bench/test.<encoder-default-file-extension></code>.
Run <code>bench/bench.rb</code> to get a usage description.
Run <code>rake bench</code> to perform an example benchmark.
== bin - Scripts
Executional files for CodeRay.
== demo - Demos and functional tests
Demonstrational scripts to show of CodeRay's features.
Run them as functional tests with <code>rake test:demos</code>.
== etc - Lots of stuff
Some addidtional files for CodeRay, mainly graphics and Vim scripts.
== gem_server - Gem output folder
For <code>rake gem</code>.
== lib - CodeRay library code
This is the base directory for the CodeRay library.
== rake_helpers - Rake helper libraries
Some files to enhance Rake, including the Autumnal Rdoc template and some scripts.
== test - Tests
Tests for the scanners.
Each language has its own subfolder and sub-suite.
Run with <code>rake test</code>.
This diff is collapsed.
#!/usr/bin/env ruby
# CodeRay Executable
#
# Version: 0.1
# Author: murphy
def err msg
$stderr.puts msg
end
begin
require 'coderay'
if ARGV.empty?
puts <<-USAGE
CodeRay #{CodeRay::VERSION} (http://rd.cYcnus.de/coderay)
Usage:
coderay -<lang> [-<format>] < file > output
coderay file [-<format>]
Example:
coderay -ruby -statistic < foo.rb
coderay codegen.c # generates codegen.c.html
USAGE
end
first, second = ARGV
if first
if first[/-(\w+)/] == first
lang = $1.to_sym
input = $stdin.read
tokens = :scan
elsif first == '-'
lang = $1.to_sym
input = $stdin.read
tokens = :scan
else
file = first
tokens = CodeRay.scan_file file
output_filename, output_ext = file, /#{Regexp.escape(File.extname(file))}$/
end
else
puts 'No lang/file given.'
exit 1
end
if second
if second[/-(\w+)/] == second
format = $1.to_sym
else
raise 'Invalid format (must be -xxx).'
end
else
$stderr.puts 'No format given; setting to default (HTML Page)'
format = :page
end
# TODO: allow streaming
if tokens == :scan
output = CodeRay::Duo[lang => format].highlight input #, :stream => true
else
output = tokens.encode format
end
out = $stdout
if output_filename
output_filename += '.' + CodeRay::Encoders[format]::FILE_EXTENSION
if File.exist? output_filename
err 'File %s already exists.' % output_filename
exit
else
out = File.open output_filename, 'w'
end
end
out.print output
rescue => boom
err "Error: #{boom.message}\n"
err boom.backtrace
err '-' * 50
err ARGV
exit 1
end
#!/usr/bin/env ruby
require 'coderay'
puts CodeRay::Encoders[:html]::CSS.new.stylesheet
module CodeRay
module Encoders
# The Tokens encoder converts the tokens to a simple
# readable format. It doesn't use colors and is mainly
# intended for console output.
#
# The tokens are converted with Tokens.write_token.
#
# The format is:
#
# <token-kind> \t <escaped token-text> \n
#
# Example:
#
# require 'coderay'
# puts CodeRay.scan("puts 3 + 4", :ruby).tokens
#
# prints:
#
# ident puts
# space
# integer 3
# space
# operator +
# space
# integer 4
#
class Tokens < Encoder
include Streamable
register_for :tokens
FILE_EXTENSION = 'tok'
protected
def token text, kind
@out << CodeRay::Tokens.write_token(text, kind)
end
end
end
end
module CodeRay
module Scanners
class Java < Scanner
register_for :java
RESERVED_WORDS = %w(abstract assert break case catch class
const continue default do else enum extends final finally for
goto if implements import instanceof interface native new
package private protected public return static strictfp super switch
synchronized this throw throws transient try void volatile while)
PREDEFINED_TYPES = %w(boolean byte char double float int long short)
PREDEFINED_CONSTANTS = %w(true false null)
IDENT_KIND = WordList.new(:ident).
add(RESERVED_WORDS, :reserved).
add(PREDEFINED_TYPES, :pre_type).
add(PREDEFINED_CONSTANTS, :pre_constant)
ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
def scan_tokens tokens, options
state = :initial
until eos?
kind = nil
match = nil
case state
when :initial
if scan(/ \s+ | \\\n /x)
kind = :space
elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
kind = :comment
elsif match = scan(/ \# \s* if \s* 0 /x)
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
kind = :comment
elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x)
kind = :operator
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
kind = IDENT_KIND[match]
if kind == :ident and check(/:(?!:)/)
match << scan(/:/)
kind = :label
end
elsif match = scan(/L?"/)
tokens << [:open, :string]
if match[0] == ?L
tokens << ['L', :modifier]
match = '"'
end
state = :string
kind = :delimiter
elsif scan(%r! \@ .* !x)
kind = :preprocessor
elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
kind = :char
elsif scan(/0[xX][0-9A-Fa-f]+/)
kind = :hex
elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
kind = :oct
elsif scan(/(?:\d+)(?![.eEfF])/)
kind = :integer
elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
kind = :float
else
getch
kind = :error
end
when :string
if scan(/[^\\\n"]+/)
kind = :content
elsif scan(/"/)
tokens << ['"', :delimiter]
tokens << [:close, :string]
state = :initial
next
elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
kind = :char
elsif scan(/ \\ | $ /x)
tokens << [:close, :string]
kind = :error
state = :initial
else
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
end
else
raise_inspect 'Unknown state', tokens
end
match ||= matched
if $DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens
end
raise_inspect 'Empty token', tokens unless match
tokens << [match, kind]
end
if state == :string
tokens << [:close, :string]
end
tokens
end
end
end
end
# http://pastie.textmate.org/50774/
module CodeRay module Scanners
class JavaScript < Scanner
register_for :javascript
RESERVED_WORDS = [
'asm', 'break', 'case', 'continue', 'default', 'do', 'else',
'for', 'goto', 'if', 'return', 'switch', 'while',
# 'struct', 'union', 'enum', 'typedef',
# 'static', 'register', 'auto', 'extern',
# 'sizeof',
'typeof',
# 'volatile', 'const', # C89
# 'inline', 'restrict', # C99
'var', 'function','try','new','in',
'instanceof','throw','catch'
]
PREDEFINED_CONSTANTS = [
'void', 'null', 'this',
'true', 'false','undefined',
]
IDENT_KIND = WordList.new(:ident).
add(RESERVED_WORDS, :reserved).
add(PREDEFINED_CONSTANTS, :pre_constant)
ESCAPE = / [rbfnrtv\n\\\/'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
def scan_tokens tokens, options
state = :initial
string_type = nil
regexp_allowed = true
until eos?
kind = :error
match = nil
if state == :initial
if scan(/ \s+ | \\\n /x)
kind = :space
elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
kind = :comment
regexp_allowed = false
elsif match = scan(/ \# \s* if \s* 0 /x)
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
kind = :comment
regexp_allowed = false
elsif regexp_allowed and scan(/\//)
tokens << [:open, :regexp]
state = :regex
kind = :delimiter
elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%] | \.(?!\d) /x)
kind = :operator
regexp_allowed=true
elsif match = scan(/ [$A-Za-z_][A-Za-z_0-9]* /x)
kind = IDENT_KIND[match]
# if kind == :ident and check(/:(?!:)/)
# match << scan(/:/)
# kind = :label
# end
regexp_allowed=false
elsif match = scan(/["']/)
tokens << [:open, :string]
string_type = matched
state = :string
kind = :delimiter
# elsif scan(/#\s*(\w*)/)
# kind = :preprocessor # FIXME multiline preprocs
# state = :include_expected if self[1] == 'include'
#
# elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
# kind = :char
elsif scan(/0[xX][0-9A-Fa-f]+/)
kind = :hex
regexp_allowed=false
elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
kind = :oct
regexp_allowed=false
elsif scan(/(?:\d+)(?![.eEfF])/)
kind = :integer
regexp_allowed=false
elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
kind = :float
regexp_allowed=false
else
getch
end
elsif state == :regex
if scan(/[^\\\/]+/)
kind = :content
elsif scan(/\\\/|\\\\/)
kind = :content
elsif scan(/\//)
tokens << [matched, :delimiter]
tokens << [:close, :regexp]
state = :initial
next
else
getch
kind = :content
end
elsif state == :string
if scan(/[^\\"']+/)
kind = :content
elsif scan(/["']/)
if string_type==matched
tokens << [matched, :delimiter]
tokens << [:close, :string]
state = :initial
string_type=nil
next
else
kind = :content
end
elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
kind = :char
elsif scan(/ \\ | $ /x)
kind = :error
state = :initial
else
raise "else case \" reached; %p not handled." % peek(1), tokens
end
# elsif state == :include_expected
# if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
# kind = :include
# state = :initial
#
# elsif match = scan(/\s+/)
# kind = :space
# state = :initial if match.index ?\n
#
# else
# getch
#
# end
#
else
raise 'else-case reached', tokens
end
match ||= matched
# raise [match, kind], tokens if kind == :error
tokens << [match, kind]
end
tokens
end
end
end end
\ No newline at end of file
module CodeRay module Scanners
class PHP < Scanner
register_for :php
RESERVED_WORDS = [
'and', 'or', 'xor', '__FILE__', 'exception', '__LINE__', 'array', 'as', 'break', 'case',
'class', 'const', 'continue', 'declare', 'default',
'die', 'do', 'echo', 'else', 'elseif',
'empty', 'enddeclare', 'endfor', 'endforeach', 'endif',
'endswitch', 'endwhile', 'eval', 'exit', 'extends',
'for', 'foreach', 'function', 'global', 'if',
'include', 'include_once', 'isset', 'list', 'new',
'print', 'require', 'require_once', 'return', 'static',
'switch', 'unset', 'use', 'var', 'while',
'__FUNCTION__', '__CLASS__', '__METHOD__', 'final', 'php_user_filter',
'interface', 'implements', 'extends', 'public', 'private',
'protected', 'abstract', 'clone', 'try', 'catch',
'throw', 'cfunction', 'old_function'
]
PREDEFINED_CONSTANTS = [
'null', '$this', 'true', 'false'
]
IDENT_KIND = WordList.new(:ident).
add(RESERVED_WORDS, :reserved).
add(PREDEFINED_CONSTANTS, :pre_constant)
ESCAPE = / [\$\wrbfnrtv\n\\\/'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
def scan_tokens tokens, options
state = :waiting_php
string_type = nil
regexp_allowed = true
until eos?
kind = :error
match = nil
if state == :initial
if scan(/ \s+ | \\\n /x)
kind = :space
elsif scan(/\?>/)
kind = :char
state = :waiting_php
elsif scan(%r{ (//|\#) [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) }mx)
kind = :comment
regexp_allowed = false
elsif match = scan(/ \# \s* if \s* 0 /x)
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
kind = :comment
regexp_allowed = false
elsif regexp_allowed and scan(/\//)
tokens << [:open, :regexp]
state = :regex
kind = :delimiter
elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%] | \.(?!\d) /x)
kind = :operator
regexp_allowed=true
elsif match = scan(/ [$@A-Za-z_][A-Za-z_0-9]* /x)
kind = IDENT_KIND[match]
regexp_allowed=false
elsif match = scan(/["']/)
tokens << [:open, :string]
string_type = matched
state = :string
kind = :delimiter
elsif scan(/0[xX][0-9A-Fa-f]+/)
kind = :hex
regexp_allowed=false
elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
kind = :oct
regexp_allowed=false
elsif scan(/(?:\d+)(?![.eEfF])/)
kind = :integer
regexp_allowed=false
elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
kind = :float
regexp_allowed=false
else
getch
end
elsif state == :regex
if scan(/[^\\\/]+/)
kind = :content
elsif scan(/\\\/|\\/)
kind = :content
elsif scan(/\//)
tokens << [matched, :delimiter]
tokens << [:close, :regexp]
state = :initial
next
else
getch
kind = :content
end
elsif state == :string
if scan(/[^\\"']+/)
kind = :content
elsif scan(/["']/)
if string_type==matched
tokens << [matched, :delimiter]
tokens << [:close, :string]
state = :initial
string_type=nil
next
else
kind = :content
end
elsif scan(/ \\ (?: \S ) /mox)
kind = :char
elsif scan(/ \\ | $ /x)
kind = :error
state = :initial
else
raise "else case \" reached; %p not handled." % peek(1), tokens
end
elsif state == :waiting_php
if scan(/<\?php/m)
kind = :char
state = :initial
elsif scan(/[^<]+/)
kind = :comment
else
kind = :comment
getch
end
else
raise 'else-case reached', tokens
end
match ||= matched
tokens << [match, kind]
end
tokens
end
end
end end
\ No newline at end of file
= CodeRay
[- Tired of blue'n'gray? Try the original version of this documentation on
http://rd.cYcnus.de/coderay/doc (use Ctrl+Click to open it in its own frame.) -]
coderay.rubychan.de[http://coderay.rubychan.de/doc/] (use Ctrl+Click to open it in its own frame.) -]
== About
CodeRay is a Ruby library for syntax highlighting.
......@@ -18,14 +18,11 @@ And with line numbers.
* is what everybody should have on their website
* solves all your problems and makes the girls run after you
Version: 0.7.4 (2006.october.20)
Version: 0.9.0
Author:: murphy (Kornelius Kalnbach)
Contact:: murphy rubychan de
Website:: coderay.rubychan.de[http://coderay.rubychan.de]
License:: GNU LGPL; see LICENSE file in the main directory.
Subversion:: $Id: README 219 2006-10-20 15:52:25Z murphy $
-----
== Installation
......@@ -33,17 +30,10 @@ You need RubyGems[http://rubyforge.org/frs/?group_id=126].
% gem install coderay
Since CodeRay is still in beta stage, nightly buildy may be useful:
% gem install coderay -rs rd.cYcnus.de/coderay
=== Dependencies
CodeRay needs Ruby 1.8 and the
strscan[http://www.ruby-doc.org/stdlib/libdoc/strscan/rdoc/index.htm]
library (part of the standard library.) It should also run with Ruby 1.9 and
yarv.
CodeRay needs Ruby 1.8.6 or later. It also runs with Ruby 1.9.1+ and JRuby 1.1+.
== Example Usage
......@@ -60,11 +50,9 @@ yarv.
See CodeRay.
Please report errors in this documentation to <coderay cycnus de>.
Please report errors in this documentation to <murphy rubychan de>.
-----
== Credits
=== Special Thanks to
......@@ -72,30 +60,39 @@ Please report errors in this documentation to <coderay cycnus de>.
* licenser (Heinz N. Gies) for ending my QBasic career, inventing the Coder
project and the input/output plugin system.
CodeRay would not exist without him.
* bovi (Daniel Bovensiepen) for helping me out on various occasions.
=== Thanks to
* Caleb Clausen for writing RubyLexer (see
http://rubyforge.org/projects/rubylexer) and lots of very interesting mail
traffic
* birkenfeld (Georg Brandl) and mitsuhiku (Arnim Ronacher) for PyKleur. You
guys rock!
* birkenfeld (Georg Brandl) and mitsuhiku (Arnim Ronacher) for PyKleur, now pygments.
You guys rock!
* Jamis Buck for writing Syntax (see http://rubyforge.org/projects/syntax)
I got some useful ideas from it.
* Doug Kearns and everyone else who worked on ruby.vim - it not only helped me
coding CodeRay, but also gave me a wonderful target to reach for the Ruby
scanner.
* everyone who used CodeBB on http://www.rubyforen.de and
http://www.infhu.de/mx
* iGEL, magichisoka, manveru, WoNDo and everyone I forgot from rubyforen.de
* Daniel and Dethix from ruby-mine.de
* Dookie (who is no longer with us...) and Leonidas from
http://www.python-forum.de
* everyone who uses CodeBB on http://www.rubyforen.de and http://www.python-forum.de
* iGEL, magichisoka, manveru, WoNáDo and everyone I forgot from rubyforen.de
* Dethix from ruby-mine.de
* zickzackw
* Dookie (who is no longer with us...) and Leonidas from http://www.python-forum.de
* Andreas Schwarz for finding out that CaseIgnoringWordList was not case
ignoring! Such things really make you write tests.
* closure for the first version of the Scheme scanner.
* Stefan Walk for the first version of the JavaScript scanner.
* Josh Goebel for another version of the JavaScript scanner and a Diff scanner.
* Jonathan Younger for pointing out the licence confusion caused by wrong LICENSE file.
* Jeremy Hinegardner for finding the shebang-on-empty-file bug in FileType.
* Charles Oliver Nutter and Yehuda Katz for helping me benchmark CodeRay on JRuby.
* Andreas Neuhaus for pointing out a markup bug in coderay/for_redcloth.
* 0xf30fc7 for the FileType patch concerning Delphi file extensions.
* The folks at redmine.org - thank you for using and fixing CodeRay!
* matz and all Ruby gods and gurus
* The inventors of: the computer, the internet, the true color display, HTML &
CSS, VIM, RUBY, pizza, microwaves, guitars, scouting, programming, anime,
CSS, VIM, Ruby, pizza, microwaves, guitars, scouting, programming, anime,
manga, coke and green ice tea.
Where would we be without all those people?
......@@ -103,23 +100,27 @@ Where would we be without all those people?
=== Created using
* Ruby[http://ruby-lang.org/]
* Chihiro (my Sony VAIO laptop), Henrietta (my new MacBook) and
Seras (my Athlon 2200+ tower)
* VIM[http://vim.org] and TextMate[http://macromates.com]
* RDE[http://homepage2.nifty.com/sakazuki/rde_e.html]
* Microsoft Windows (yes, I confess!) and MacOS X
* Firefox[http://www.mozilla.org/products/firefox/] and
* Chihiro (my Sony VAIO laptop); Henrietta (my old MacBook);
Triella, born Rico (my new MacBook); as well as
Seras and Hikari (my PCs)
* RDE[http://homepage2.nifty.com/sakazuki/rde_e.html],
VIM[http://vim.org] and TextMate[http://macromates.com]
* Subversion[http://subversion.tigris.org/]
* Redmine[http://redmine.org/]
* Firefox[http://www.mozilla.org/products/firefox/],
Firebug[http://getfirebug.com/], Safari[http://www.apple.com/safari/], and
Thunderbird[http://www.mozilla.org/products/thunderbird/]
* Rake[http://rake.rubyforge.org/]
* RubyGems[http://docs.rubygems.org/]
* {Subversion/TortoiseSVN}[http://tortoisesvn.tigris.org/] using Apache via
* RubyGems[http://docs.rubygems.org/] and Rake[http://rake.rubyforge.org/]
* TortoiseSVN[http://tortoisesvn.tigris.org/] using Apache via
XAMPP[http://www.apachefriends.org/en/xampp.html]
* RDoc (though I'm quite unsatisfied with it)
* Microsoft Windows (yes, I confess!) and MacOS X
* GNUWin32, MinGW and some other tools to make the shell under windows a bit
more useful
less useless
* Term::ANSIColor[http://term-ansicolor.rubyforge.org/]
* PLEAC[http://pleac.sourceforge.net/] code examples
---
=== Free
* As you can see, CodeRay was created under heavy use of *free* software.
* So CodeRay is also *free*.
......
# = CodeRay Library
#
# $Id: coderay.rb 227 2007-04-24 12:26:18Z murphy $
#
# CodeRay is a Ruby library for syntax highlighting.
#
# I try to make CodeRay easy to use and intuitive, but at the same time fully featured, complete,
......@@ -107,7 +105,7 @@
#
# CodeRay.scan_stream:: Scan in stream mode.
#
# == All-in-One Encoding
# == All-in-One Encoding
#
# CodeRay.encode:: Highlight a string with a given input and output format.
#
......@@ -130,13 +128,14 @@
module CodeRay
# Version: Major.Minor.Teeny[.Revision]
# Major: 0 for pre-release
# Minor: odd for beta, even for stable
# Teeny: development state
# Revision: Subversion Revision number (generated on rake)
VERSION = '0.7.6'
# Major: 0 for pre-stable, 1 for stable
# Minor: feature milestone
# Teeny: development state, 0 for pre-release
# Revision: Subversion Revision number (generated on rake gem:make)
VERSION = '0.9.0'
require 'coderay/tokens'
require 'coderay/token_classes'
require 'coderay/scanner'
require 'coderay/encoder'
require 'coderay/duo'
......@@ -315,6 +314,7 @@ end
# Run a test script.
if $0 == __FILE__
$stderr.print 'Press key to print demo.'; gets
# Just use this file as an example of Ruby code.
code = File.read(__FILE__)[/module CodeRay.*/m]
print CodeRay.scan(code, :ruby).html
end
......@@ -2,8 +2,6 @@ module CodeRay
# = Duo
#
# $Id: scanner.rb 123 2006-03-21 14:46:34Z murphy $
#
# A Duo is a convenient way to use CodeRay. You just create a Duo,
# giving it a lang (language of the input code) and a format (desired
# output format), and call Duo#highlight with the code.
......
require "stringio"
module CodeRay
# This module holds the Encoder class and its subclasses.
......@@ -132,30 +130,56 @@ module CodeRay
# By default, it calls text_token or block_token, depending on
# whether +text+ is a String.
def token text, kind
out =
if text.is_a? ::String # Ruby 1.9: :open.is_a? String
encoded_token =
if text.is_a? ::String
text_token text, kind
elsif text.is_a? ::Symbol
block_token text, kind
else
raise 'Unknown token text type: %p' % text
end
@out << out if @out
append_encoded_token_to_output encoded_token
end
def append_encoded_token_to_output encoded_token
@out << encoded_token if encoded_token && defined?(@out) && @out
end
# Called for each text token ([text, kind]), where text is a String.
def text_token text, kind
end
# Called for each block (non-text) token ([action, kind]), where action is a Symbol.
def block_token action, kind
case action
when :open
open_token kind
when :close
close_token kind
when :begin_line
begin_line kind
when :end_line
end_line kind
else
raise 'unknown block action: %p' % action
end
end
# Called for each block token at the start of the block ([:open, kind]).
def open_token kind
end
# Called for each block token end of the block ([:close, kind]).
def close_token kind
end
# Called for each line token block at the start of the line ([:begin_line, kind]).
def begin_line kind
end
# Called for each line token block at the end of the line ([:end_line, kind]).
def end_line kind
end
# Called with merged options after encoding starts.
# The return value is the result of encoding, typically @out.
......@@ -167,8 +191,16 @@ module CodeRay
#
# The already created +tokens+ object must be used; it can be a
# TokenStream or a Tokens object.
def compile tokens, options
tokens.each(&self)
if RUBY_VERSION >= '1.9'
def compile tokens, options
for text, kind in tokens
token text, kind
end
end
else
def compile tokens, options
tokens.each(&self)
end
end
end
......
module CodeRay
module Encoders
load :token_class_filter
class CommentFilter < TokenClassFilter
register_for :comment_filter
DEFAULT_OPTIONS = TokenClassFilter::DEFAULT_OPTIONS.merge \
:exclude => [:comment]
end
end
end
......@@ -35,6 +35,14 @@ module Encoders
">"
end
def begin_line kind
"#{kind}["
end
def end_line kind
"]"
end
end
end
......
......@@ -9,10 +9,9 @@ module Encoders
register_for :div
DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({
DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge \
:css => :style,
:wrap => :div,
})
:wrap => :div
end
......
module CodeRay
module Encoders
class Filter < Encoder
register_for :filter
protected
def setup options
@out = Tokens.new
end
end
end
end
......@@ -25,10 +25,6 @@ module Encoders
#
# == Options
#
# === :escape
# Escape html entities
# Default: true
#
# === :tab_width
# Convert \t characters to +n+ spaces (a number.)
# Default: 8
......@@ -45,6 +41,12 @@ module Encoders
#
# Default: nil
#
# === :title
#
# The title of the HTML page (works only when :wrap is set to :page.)
#
# Default: 'CodeRay output'
#
# === :line_numbers
# Include line numbers in :table, :inline, :list or nil (no line numbers)
#
......@@ -60,6 +62,16 @@ module Encoders
#
# Default: 10
#
# === :highlight_lines
#
# Highlights certain line numbers now by using the :highlight_lines option.
# Can be any Enumerable, typically just an Array or Range, of numbers.
#
# Bolding is deactivated when :highlight_lines is set. It only makes sense
# in combination with :line_numbers.
#
# Default: nil
#
# === :hint
# Include some information into the output using the title attribute.
# Can be :info (show token type on mouse-over), :info_long (with full path)
......@@ -74,19 +86,19 @@ module Encoders
FILE_EXTENSION = 'html'
DEFAULT_OPTIONS = {
:escape => true,
:tab_width => 8,
:level => :xhtml,
# :level => :xhtml, # reserved for future use
:css => :class,
:style => :cycnus,
:wrap => nil,
:title => 'CodeRay output',
:line_numbers => nil,
:line_number_start => 1,
:bold_every => 10,
:highlight_lines => nil,
:hint => false,
}
......@@ -141,7 +153,7 @@ module Encoders
when :debug
classes.inspect
end
" title=\"#{title}\""
title ? " title=\"#{title}\"" : ''
end
def setup options
......@@ -150,7 +162,6 @@ module Encoders
@HTML_ESCAPE = HTML_ESCAPE.dup
@HTML_ESCAPE["\t"] = ' ' * options[:tab_width]
@escape = options[:escape]
@opened = [nil]
@css = CSS.new options[:style]
......@@ -164,7 +175,7 @@ module Encoders
when :class
@css_style = Hash.new do |h, k|
c = Tokens::ClassOfKind[k.first]
c = CodeRay::Tokens::ClassOfKind[k.first]
if c == :NO_HIGHLIGHT and not hint
h[k.dup] = false
else
......@@ -222,43 +233,70 @@ module Encoders
@out.css = @css
@out.numerize! options[:line_numbers], options
@out.wrap! options[:wrap]
@out.apply_title! options[:title]
super
end
def token text, type
if text.is_a? ::String
if @escape && (text =~ /#{HTML_ESCAPE_PATTERN}/o)
def token text, type = :plain
case text
when nil
# raise 'Token with nil as text was given: %p' % [[text, type]]
when String
if text =~ /#{HTML_ESCAPE_PATTERN}/o
text = text.gsub(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] }
end
@opened[0] = type
if style = @css_style[@opened]
if text != "\n" && style = @css_style[@opened]
@out << style << text << '</span>'
else
@out << text
end
else
case text
when :open
@opened[0] = type
@out << (@css_style[@opened] || '<span>')
@opened << type
when :close
if @opened.empty?
# nothing to close
else
if $DEBUG and (@opened.size == 1 or @opened.last != type)
raise 'Malformed token stream: Trying to close a token (%p) \
that is not open. Open are: %p.' % [type, @opened[1..-1]]
end
@out << '</span>'
@opened.pop
# token groups, eg. strings
when :open
@opened[0] = type
@out << (@css_style[@opened] || '<span>')
@opened << type
when :close
if @opened.empty?
# nothing to close
else
if $DEBUG and (@opened.size == 1 or @opened.last != type)
raise 'Malformed token stream: Trying to close a token (%p) \
that is not open. Open are: %p.' % [type, @opened[1..-1]]
end
when nil
raise 'Token with nil as text was given: %p' % [[text, type]]
@out << '</span>'
@opened.pop
end
# whole lines to be highlighted, eg. a deleted line in a diff
when :begin_line
@opened[0] = type
if style = @css_style[@opened]
@out << style.sub('<span', '<div')
else
raise 'unknown token kind: %p' % text
@out << '<div>'
end
@opened << type
when :end_line
if @opened.empty?
# nothing to close
else
if $DEBUG and (@opened.size == 1 or @opened.last != type)
raise 'Malformed token stream: Trying to close a line (%p) \
that is not open. Open are: %p.' % [type, @opened[1..-1]]
end
@out << '</div>'
@opened.pop
end
else
raise 'unknown token kind: %p' % [text]
end
end
......
......@@ -27,16 +27,19 @@ module Encoders
1.upto(styles.size) do |offset|
break if style = cl[styles[offset .. -1]]
end
raise 'Style not found: %p' % [styles] if $DEBUG and style.empty?
$stderr.puts 'Style not found: %p' % [styles] if $DEBUG and style.empty?
return style
end
private
CSS_CLASS_PATTERN = /
( (?: # $1 = classes
\s* \. [-\w]+
)+ )
( # $1 = selectors
(?:
(?: \s* \. [-\w]+ )+
\s* ,?
)+
)
\s* \{ \s*
( [^\}]+ )? # $2 = style
\s* \} \s*
......@@ -44,12 +47,14 @@ module Encoders
( . ) # $3 = error
/mx
def parse stylesheet
stylesheet.scan CSS_CLASS_PATTERN do |classes, style, error|
stylesheet.scan CSS_CLASS_PATTERN do |selectors, style, error|
raise "CSS parse error: '#{error.inspect}' not recognized" if error
styles = classes.scan(/[-\w]+/)
cl = styles.pop
@classes[cl] ||= Hash.new
@classes[cl][styles] = style.to_s.strip
for selector in selectors.split(',')
classes = selector.scan(/[-\w]+/)
cl = classes.pop
@classes[cl] ||= Hash.new
@classes[cl][classes] = style.to_s.strip.delete(' ').chomp(';')
end
end
end
......
......@@ -32,9 +32,19 @@ module Encoders
#end
bold_every = options[:bold_every]
highlight_lines = options[:highlight_lines]
bolding =
if bold_every == false
if bold_every == false && highlight_lines == nil
proc { |line| line.to_s }
elsif highlight_lines.is_a? Enumerable
highlight_lines = highlight_lines.to_set
proc do |line|
if highlight_lines.include? line
"<strong class=\"highlighted\">#{line}</strong>" # highlighted line numbers in bold
else
line.to_s
end
end
elsif bold_every.is_a? Integer
raise ArgumentError, ":bolding can't be 0." if bold_every == 0
proc do |line|
......@@ -51,12 +61,12 @@ module Encoders
case mode
when :inline
max_width = (start + line_count).to_s.size
line = start
line_number = start
gsub!(/^/) do
line_number = bolding.call line
indent = ' ' * (max_width - line.to_s.size)
res = "<span class=\"no\">#{indent}#{line_number}</span> "
line += 1
line_number_text = bolding.call line_number
indent = ' ' * (max_width - line_number.to_s.size) # TODO: Optimize (10^x)
res = "<span class=\"no\">#{indent}#{line_number_text}</span> "
line_number += 1
res
end
......@@ -65,12 +75,12 @@ module Encoders
# Because even monospace fonts seem to have different heights when bold,
# I make the newline bold, both in the code and the line numbers.
# FIXME Still not working perfect for Mr. Internet Exploder
# FIXME Firefox struggles with very long codes (> 200 lines)
line_numbers = (start ... start + line_count).to_a.map(&bolding).join("\n")
line_numbers << "\n" # also for Mr. MS Internet Exploder :-/
line_numbers.gsub!(/\n/) { "<tt>\n</tt>" }
line_numbers_table_tpl = TABLE.apply('LINE_NUMBERS', line_numbers)
gsub!(/<\/div>\n/) { '</div>' }
gsub!(/\n/) { "<tt>\n</tt>" }
wrap_in! line_numbers_table_tpl
@wrapped_in = :div
......@@ -90,8 +100,9 @@ module Encoders
end
close = '</span>' * opened_tags.size
"<li>#{open}#{line}#{close}</li>"
"<li>#{open}#{line}#{close}</li>\n"
end
chomp!("\n")
wrap_in! LIST
@wrapped_in = :div
......
......@@ -86,6 +86,11 @@ module Encoders
Template.wrap! self, template, 'CONTENT'
self
end
def apply_title! title
self.sub!(/(<title>)(<\/title>)/) { $1 + title + $2 }
self
end
def wrap! element, *args
return self if not element or element == wrapped_in
......@@ -100,6 +105,10 @@ module Encoders
wrap! :div if wrapped_in? nil
raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? :div
wrap_in! Output.page_template_for_css(@css)
if args.first.is_a?(Hash) && title = args.first[:title]
apply_title! title
end
self
when nil
return self
else
......@@ -166,7 +175,9 @@ module Encoders
# title="double click to expand"
LIST = <<-`LIST`
<ol class="CodeRay"><%CONTENT%></ol>
<ol class="CodeRay">
<%CONTENT%>
</ol>
LIST
PAGE = <<-`PAGE`
......@@ -175,7 +186,7 @@ module Encoders
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="de">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<title>CodeRay HTML Encoder Example</title>
<title></title>
<style type="text/css">
<%CSS%>
</style>
......
module CodeRay
module Encoders
# = JSON Encoder
class JSON < Encoder
register_for :json
FILE_EXTENSION = 'json'
protected
def compile tokens, options
require 'json'
@out = tokens.to_a.to_json
end
end
end
end
module CodeRay
module Encoders
# Counts the LoC (Lines of Code). Returns an Integer >= 0.
#
# Everything that is not comment, markup, doctype/shebang, or an empty line,
# is considered to be code.
#
# For example,
# * HTML files not containing JavaScript have 0 LoC
# * in a Java class without comments, LoC is the number of non-empty lines
#
# A Scanner class should define the token kinds that are not code in the
# KINDS_NOT_LOC constant.
class LinesOfCode < Encoder
register_for :lines_of_code
NON_EMPTY_LINE = /^\s*\S.*$/
def compile tokens, options
kinds_not_loc = tokens.scanner.class::KINDS_NOT_LOC
code = tokens.token_class_filter :exclude => kinds_not_loc
@loc = code.text.scan(NON_EMPTY_LINE).size
end
def finish options
@loc
end
end
end
end
......@@ -9,11 +9,10 @@ module Encoders
register_for :page
DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({
DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge \
:css => :class,
:wrap => :page,
:line_numbers => :table
})
end
......
......@@ -9,10 +9,9 @@ module Encoders
register_for :span
DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({
DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge \
:css => :style,
:wrap => :span,
})
:wrap => :span
end
......
......@@ -14,16 +14,16 @@ module Encoders
protected
def setup options
@out = ''
super
@sep = options[:separator]
end
def token text, kind
@out << text + @sep if text.is_a? ::String
def text_token text, kind
text + @sep
end
def finish options
@out.chomp @sep
super.chomp @sep
end
end
......
module CodeRay
module Encoders
load :filter
class TokenClassFilter < Filter
include Streamable
register_for :token_class_filter
DEFAULT_OPTIONS = {
:exclude => [],
:include => :all
}
protected
def setup options
super
@exclude = options[:exclude]
@include = options[:include]
end
def text_token text, kind
[text, kind] if \
(@include == :all || @include.include?(kind)) &&
!(@exclude == :all || @exclude.include?(kind))
end
end
end
end
......@@ -29,6 +29,7 @@ module Encoders
end
def finish options
@out = ''
@doc.write @out, options[:pretty], options[:transitive], true
@out
end
......
module CodeRay
# A little hack to enable CodeRay highlighting in RedCloth.
#
# Usage:
# require 'coderay'
# require 'coderay/for_redcloth'
# RedCloth.new('@[ruby]puts "Hello, World!"@').to_html
#
# Make sure you have RedCloth 4.0.3 activated, for example by calling
# require 'rubygems'
# before RedCloth is loaded and before calling CodeRay.for_redcloth.
module ForRedCloth
def self.install
gem 'RedCloth', '>= 4.0.3' rescue nil
require 'redcloth'
unless RedCloth::VERSION.to_s >= '4.0.3'
raise 'CodeRay.for_redcloth needs RedCloth version 4.0.3 or later.'
end
RedCloth::TextileDoc.send :include, ForRedCloth::TextileDoc
RedCloth::Formatters::HTML.module_eval do
def unescape(html)
replacements = {
'&amp;' => '&',
'&quot;' => '"',
'&gt;' => '>',
'&lt;' => '<',
}
html.gsub(/&(?:amp|quot|[gl]t);/) { |entity| replacements[entity] }
end
undef code, bc_open, bc_close, escape_pre
def code(opts) # :nodoc:
opts[:block] = true
if !opts[:lang] && RedCloth::VERSION.to_s >= '4.2.0'
# simulating pre-4.2 behavior
if opts[:text].sub!(/\A\[(\w+)\]/, '')
if CodeRay::Scanners[$1].plugin_id == 'plaintext'
opts[:text] = $& + opts[:text]
else
opts[:lang] = $1
end
end
end
if opts[:lang] && !filter_coderay
require 'coderay'
@in_bc ||= nil
format = @in_bc ? :div : :span
opts[:text] = unescape(opts[:text]) unless @in_bc
highlighted_code = CodeRay.encode opts[:text], opts[:lang], format, :stream => true
highlighted_code.sub!(/\A<(span|div)/) { |m| m + pba(@in_bc || opts) }
highlighted_code
else
"<code#{pba(opts)}>#{opts[:text]}</code>"
end
end
def bc_open(opts) # :nodoc:
opts[:block] = true
@in_bc = opts
opts[:lang] ? '' : "<pre#{pba(opts)}>"
end
def bc_close(opts) # :nodoc:
opts = @in_bc
@in_bc = nil
opts[:lang] ? '' : "</pre>\n"
end
def escape_pre(text)
if @in_bc ||= nil
text
else
html_esc(text, :html_escape_preformatted)
end
end
end
end
module TextileDoc # :nodoc:
attr_accessor :filter_coderay
end
end
end
CodeRay::ForRedCloth.install
\ No newline at end of file
#!/usr/bin/env ruby
module CodeRay
# = FileType
......@@ -33,12 +34,12 @@ module FileType
# That means you can get filetypes from files that don't exist.
def [] filename, read_shebang = false
name = File.basename filename
ext = File.extname name
ext.sub!(/^\./, '') # delete the leading dot
ext = File.extname(name).sub(/^\./, '') # from last dot, delete the leading dot
ext2 = filename.to_s[/\.(.*)/, 1] # from first dot
type =
TypeFromExt[ext] ||
TypeFromExt[ext.downcase] ||
(TypeFromExt[ext2.downcase] if ext2) ||
TypeFromName[name] ||
TypeFromName[name.downcase]
type ||= shebang(filename) if read_shebang
......@@ -49,8 +50,11 @@ module FileType
def shebang filename
begin
File.open filename, 'r' do |f|
first_line = f.gets
first_line[TypeFromShebang]
if first_line = f.gets
if type = first_line[TypeFromShebang]
type.to_sym
end
end
end
rescue IOError
nil
......@@ -77,27 +81,41 @@ module FileType
end
TypeFromExt = {
'rb' => :ruby,
'rbw' => :ruby,
'rake' => :ruby,
'mab' => :ruby,
'cpp' => :c,
'c' => :c,
'cpp' => :cpp,
'css' => :css,
'diff' => :diff,
'dpr' => :delphi,
'groovy' => :groovy,
'gvy' => :groovy,
'h' => :c,
'java' => :java,
'js' => :javascript,
'xml' => :xml,
'htm' => :html,
'html' => :html,
'html.erb' => :rhtml,
'java' => :java,
'js' => :java_script,
'json' => :json,
'mab' => :ruby,
'pas' => :delphi,
'patch' => :diff,
'php' => :php,
'php3' => :php,
'php4' => :php,
'php5' => :php,
'xhtml' => :xhtml,
'py' => :python,
'py3' => :python,
'pyw' => :python,
'rake' => :ruby,
'raydebug' => :debug,
'rb' => :ruby,
'rbw' => :ruby,
'rhtml' => :rhtml,
'ss' => :scheme,
'rxml' => :ruby,
'sch' => :scheme,
'sql' => :sql,
'ss' => :scheme,
'xhtml' => :xhtml,
'xml' => :xml,
'yaml' => :yaml,
'yml' => :yaml,
}
......@@ -115,15 +133,16 @@ end
if $0 == __FILE__
$VERBOSE = true
eval DATA.read, nil, $0, __LINE__+4
eval DATA.read, nil, $0, __LINE__ + 4
end
__END__
require 'test/unit'
class TC_FileType < Test::Unit::TestCase
class FileTypeTests < Test::Unit::TestCase
include CodeRay
def test_fetch
assert_raise FileType::UnknownFileType do
FileType.fetch ''
......@@ -150,6 +169,8 @@ class TC_FileType < Test::Unit::TestCase
def test_ruby
assert_equal :ruby, FileType['test.rb']
assert_equal :ruby, FileType['test.java.rb']
assert_equal :java, FileType['test.rb.java']
assert_equal :ruby, FileType['C:\\Program Files\\x\\y\\c\\test.rbw']
assert_equal :ruby, FileType['/usr/bin/something/Rakefile']
assert_equal :ruby, FileType['~/myapp/gem/Rantfile']
......@@ -174,6 +195,7 @@ class TC_FileType < Test::Unit::TestCase
assert_equal :xhtml, FileType['test.xhtml']
assert_equal :xhtml, FileType['test.html.xhtml']
assert_equal :rhtml, FileType['_form.rhtml']
assert_equal :rhtml, FileType['_form.html.erb']
end
def test_yaml
......@@ -183,7 +205,16 @@ class TC_FileType < Test::Unit::TestCase
assert_not_equal :yaml, FileType['YAML']
end
def test_shebang
def test_pathname
require 'pathname'
pn = Pathname.new 'test.rb'
assert_equal :ruby, FileType[pn]
dir = Pathname.new '/etc/var/blubb'
assert_equal :ruby, FileType[dir + pn]
assert_equal :cpp, FileType[dir + 'test.cpp']
end
def test_no_shebang
dir = './test'
if File.directory? dir
Dir.chdir dir do
......@@ -191,5 +222,19 @@ class TC_FileType < Test::Unit::TestCase
end
end
end
def test_shebang_empty_file
require 'tmpdir'
tmpfile = File.join(Dir.tmpdir, 'bla')
File.open(tmpfile, 'w') { } # touch
assert_equal nil, FileType[tmpfile]
end
def test_shebang
require 'tmpdir'
tmpfile = File.join(Dir.tmpdir, 'bla')
File.open(tmpfile, 'w') { |f| f.puts '#!/usr/bin/env ruby' }
assert_equal :ruby, FileType[tmpfile, true]
end
end
......@@ -2,7 +2,7 @@
#
# A simplified interface to the gzip library +zlib+ (from the Ruby Standard Library.)
#
# Author: murphy (mail to murphy cYcnus de)
# Author: murphy (mail to murphy rubychan de)
#
# Version: 0.2 (2005.may.28)
#
......
......@@ -2,8 +2,6 @@ module CodeRay
# = PluginHost
#
# $Id: plugin.rb 220 2007-01-01 02:58:58Z murphy $
#
# A simple subclass plugin system.
#
# Example:
......@@ -22,7 +20,7 @@ module CodeRay
#
# Generators[:fancy] #-> FancyGenerator
# # or
# require_plugin 'Generators/fancy'
# CodeRay.require_plugin 'Generators/fancy'
module PluginHost
# Raised if Encoders::[] fails because:
......@@ -135,9 +133,13 @@ module PluginHost
# map :navy => :dark_blue
# default :gray
# end
def default id
id = validate_id id
plugin_hash[nil] = id
def default id = nil
if id
id = validate_id id
plugin_hash[nil] = id
else
plugin_hash[nil]
end
end
# Every plugin must register itself for one or more
......@@ -174,7 +176,7 @@ module PluginHost
def inspect
map = plugin_hash.dup
map.each do |id, plugin|
map[id] = plugin.to_s[/(?>[\w_]+)$/]
map[id] = plugin.to_s[/(?>\w+)$/]
end
"#{name}[#{host_id}]#{map.inspect}"
end
......@@ -241,7 +243,7 @@ protected
id
elsif id.is_a? String
if id[/\w+/] == id
id.to_sym
id.downcase.to_sym
else
raise ArgumentError, "Invalid id: '#{id}' given."
end
......@@ -279,6 +281,14 @@ module Plugin
plugin_host.register self, *ids
end
def title title = nil
if title
@title = title.to_s
else
@title ||= name[/([^:]+)$/, 1]
end
end
# The host for this Plugin class.
def plugin_host host = nil
if host and not host.is_a? PluginHost
......@@ -299,15 +309,23 @@ module Plugin
#
# The above example loads the file myplugin/my_helper.rb relative to the
# file in which MyPlugin was defined.
#
# You can also load a helper from a different plugin:
#
# helper 'other_plugin/other_helper'
def helper *helpers
for helper in helpers
self::PLUGIN_HOST.require_helper plugin_id, helper.to_s
if helper.is_a?(String) && helper[/\//]
self::PLUGIN_HOST.require_helper $`, $'
else
self::PLUGIN_HOST.require_helper plugin_id, helper.to_s
end
end
end
# Returns the pulgin id used by the engine.
def plugin_id
name[/[\w_]+$/].downcase
name[/\w+$/].downcase
end
end
......@@ -318,7 +336,7 @@ end
# CodeRay.require_plugin '<Host ID>/<Plugin ID>'
#
# Returns the loaded plugin.
def require_plugin path
def self.require_plugin path
host_id, plugin_id = path.split '/', 2
host = PluginHost.host_by_id(host_id)
raise PluginHost::HostNotFound,
......
......@@ -104,6 +104,7 @@ class CaseIgnoringWordList < WordList
h[k] = h.fetch k.downcase, default
end
else
super(default, false)
def self.[] key # :nodoc:
super(key.downcase)
end
......
......@@ -4,8 +4,6 @@ module CodeRay
# = Scanners
#
# $Id: scanner.rb 222 2007-01-01 16:26:17Z murphy $
#
# This module holds the Scanner class and its subclasses.
# For example, the Ruby scanner is named CodeRay::Scanners::Ruby
# can be found in coderay/scanners/ruby.
......@@ -45,6 +43,7 @@ module CodeRay
# You can also use +map+, +any?+, +find+ and even +sort_by+,
# if you want.
class Scanner < StringScanner
extend Plugin
plugin_host Scanners
......@@ -57,6 +56,8 @@ module CodeRay
#
# Define @default_options for subclasses.
DEFAULT_OPTIONS = { :stream => false }
KINDS_NOT_LOC = [:comment, :doctype]
class << self
......@@ -66,7 +67,16 @@ module CodeRay
end
def normify code
code = code.to_s.to_unix
code = code.to_s
if code.respond_to? :force_encoding
begin
code.force_encoding 'utf-8'
code[/\z/] # raises an ArgumentError when code contains a non-UTF-8 char
rescue ArgumentError
code.force_encoding 'binary'
end
end
code.to_unix
end
def file_extension extension = nil
......@@ -75,7 +85,7 @@ module CodeRay
else
@file_extension ||= plugin_id.to_s
end
end
end
end
......@@ -121,6 +131,7 @@ module CodeRay
"but :stream is #{@options[:stream]}" if block_given?
@tokens ||= Tokens.new
end
@tokens.scanner = self
setup
end
......@@ -178,6 +189,16 @@ module CodeRay
def line
string[0..pos].count("\n") + 1
end
def column pos = self.pos
return 0 if pos <= 0
string = string()
if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
@bin_string ||= string.dup.force_encoding(:binary)
string = @bin_string
end
pos - (string.rindex(?\n, pos) || 0)
end
protected
......@@ -202,6 +223,7 @@ module CodeRay
def reset_instance
@tokens.clear unless @options[:keep_tokens]
@cached_tokens = nil
@bin_string = nil if defined? @bin_string
end
# Scanner error with additional status information
......@@ -214,7 +236,7 @@ module CodeRay
tokens:
%s
current line: %d pos = %d
current line: %d column: %d pos: %d
matched: %p state: %p
bol? = %p, eos? = %p
......@@ -229,10 +251,10 @@ surrounding code:
msg,
tokens.size,
tokens.last(10).map { |t| t.inspect }.join("\n"),
line, pos,
line, column, pos,
matched, state, bol?, eos?,
string[pos-ambit,ambit],
string[pos,ambit],
string[pos - ambit, ambit],
string[pos, ambit],
]
end
......
module CodeRay
module Scanners
map :cpp => :c,
:plain => :plaintext,
:pascal => :delphi,
map \
:h => :c,
:cplusplus => :cpp,
:'c++' => :cpp,
:ecma => :java_script,
:ecmascript => :java_script,
:ecma_script => :java_script,
:irb => :ruby,
:xml => :html,
:xhtml => :nitro_xhtml,
:nitro => :nitro_xhtml
:javascript => :java_script,
:js => :java_script,
:nitro => :nitro_xhtml,
:pascal => :delphi,
:plain => :plaintext,
:xhtml => :html,
:yml => :yaml
default :plain
......
......@@ -3,22 +3,20 @@ module Scanners
class C < Scanner
register_for :c
include Streamable
register_for :c
file_extension 'c'
RESERVED_WORDS = [
'asm', 'break', 'case', 'continue', 'default', 'do', 'else',
'for', 'goto', 'if', 'return', 'switch', 'while',
'struct', 'union', 'enum', 'typedef',
'static', 'register', 'auto', 'extern',
'sizeof',
'volatile', 'const', # C89
'inline', 'restrict', # C99
'asm', 'break', 'case', 'continue', 'default', 'do',
'else', 'enum', 'for', 'goto', 'if', 'return',
'sizeof', 'struct', 'switch', 'typedef', 'union', 'while',
'restrict', # C99
]
PREDEFINED_TYPES = [
'int', 'long', 'short', 'char', 'void',
'int', 'long', 'short', 'char',
'signed', 'unsigned', 'float', 'double',
'bool', 'complex', # C99
]
......@@ -27,13 +25,19 @@ module Scanners
'EOF', 'NULL',
'true', 'false', # C99
]
DIRECTIVES = [
'auto', 'extern', 'register', 'static', 'void',
'const', 'volatile', # C89
'inline', # C99
]
IDENT_KIND = WordList.new(:ident).
add(RESERVED_WORDS, :reserved).
add(PREDEFINED_TYPES, :pre_type).
add(DIRECTIVES, :directive).
add(PREDEFINED_CONSTANTS, :pre_constant)
ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
def scan_tokens tokens, options
......@@ -59,16 +63,19 @@ module Scanners
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
kind = :comment
elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x)
elsif scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
kind = :operator
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
kind = IDENT_KIND[match]
if kind == :ident and check(/:(?!:)/)
match << scan(/:/)
# FIXME: don't match a?b:c
kind = :label
end
elsif scan(/\$/)
kind = :ident
elsif match = scan(/L?"/)
tokens << [:open, :string]
if match[0] == ?L
......@@ -91,7 +98,7 @@ module Scanners
elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
kind = :oct
elsif scan(/(?:\d+)(?![.eEfF])/)
elsif scan(/(?:\d+)(?![.eEfF])L?L?/)
kind = :integer
elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
......@@ -122,7 +129,7 @@ module Scanners
end
when :include_expected
if scan(/[^\n]+/)
if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
kind = :include
state = :initial
......@@ -131,8 +138,8 @@ module Scanners
state = :initial if match.index ?\n
else
getch
kind = :error
state = :initial
next
end
......
module CodeRay
module Scanners
class CPlusPlus < Scanner
include Streamable
register_for :cpp
file_extension 'cpp'
title 'C++'
# http://www.cppreference.com/wiki/keywords/start
RESERVED_WORDS = [
'and', 'and_eq', 'asm', 'bitand', 'bitor', 'break',
'case', 'catch', 'class', 'compl', 'const_cast',
'continue', 'default', 'delete', 'do', 'dynamic_cast', 'else',
'enum', 'export', 'for', 'goto', 'if', 'namespace', 'new',
'not', 'not_eq', 'or', 'or_eq', 'reinterpret_cast', 'return',
'sizeof', 'static_cast', 'struct', 'switch', 'template',
'throw', 'try', 'typedef', 'typeid', 'typename', 'union',
'while', 'xor', 'xor_eq'
]
PREDEFINED_TYPES = [
'bool', 'char', 'double', 'float', 'int', 'long',
'short', 'signed', 'unsigned', 'wchar_t', 'string'
]
PREDEFINED_CONSTANTS = [
'false', 'true',
'EOF', 'NULL',
]
PREDEFINED_VARIABLES = [
'this'
]
DIRECTIVES = [
'auto', 'const', 'explicit', 'extern', 'friend', 'inline', 'mutable', 'operator',
'private', 'protected', 'public', 'register', 'static', 'using', 'virtual', 'void',
'volatile'
]
IDENT_KIND = WordList.new(:ident).
add(RESERVED_WORDS, :reserved).
add(PREDEFINED_TYPES, :pre_type).
add(PREDEFINED_VARIABLES, :local_variable).
add(DIRECTIVES, :directive).
add(PREDEFINED_CONSTANTS, :pre_constant)
ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
def scan_tokens tokens, options
state = :initial
until eos?
kind = nil
match = nil
case state
when :initial
if scan(/ \s+ | \\\n /x)
kind = :space
elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
kind = :comment
elsif match = scan(/ \# \s* if \s* 0 /x)
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
kind = :comment
elsif scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
kind = :operator
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
kind = IDENT_KIND[match]
if kind == :ident and check(/:(?!:)/)
# FIXME: don't match a?b:c
kind = :label
elsif match == 'class'
state = :class_name_expected
end
elsif scan(/\$/)
kind = :ident
elsif match = scan(/L?"/)
tokens << [:open, :string]
if match[0] == ?L
tokens << ['L', :modifier]
match = '"'
end
state = :string
kind = :delimiter
elsif scan(/#\s*(\w*)/)
kind = :preprocessor
state = :include_expected if self[1] == 'include'
elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
kind = :char
elsif scan(/0[xX][0-9A-Fa-f]+/)
kind = :hex
elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
kind = :oct
elsif scan(/(?:\d+)(?![.eEfF])L?L?/)
kind = :integer
elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
kind = :float
else
getch
kind = :error
end
when :string
if scan(/[^\\"]+/)
kind = :content
elsif scan(/"/)
tokens << ['"', :delimiter]
tokens << [:close, :string]
state = :initial
next
elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
kind = :char
elsif scan(/ \\ | $ /x)
tokens << [:close, :string]
kind = :error
state = :initial
else
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
end
when :include_expected
if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
kind = :include
state = :initial
elsif match = scan(/\s+/)
kind = :space
state = :initial if match.index ?\n
else
state = :initial
next
end
when :class_name_expected
if scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
kind = :class
state = :initial
elsif match = scan(/\s+/)
kind = :space
else
getch
kind = :error
state = :initial
end
else
raise_inspect 'Unknown state', tokens
end
match ||= matched
if $DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens
end
raise_inspect 'Empty token', tokens unless match
tokens << [match, kind]
end
if state == :string
tokens << [:close, :string]
end
tokens
end
end
end
end
module CodeRay
module Scanners
class CSS < Scanner
register_for :css
KINDS_NOT_LOC = [
:comment,
:class, :pseudo_class, :type,
:constant, :directive,
:key, :value, :operator, :color, :float,
:error, :important,
]
module RE
NonASCII = /[\x80-\xFF]/
Hex = /[0-9a-fA-F]/
Unicode = /\\#{Hex}{1,6}(?:\r\n|\s)?/ # differs from standard because it allows uppercase hex too
Escape = /#{Unicode}|\\[^\r\n\f0-9a-fA-F]/
NMChar = /[-_a-zA-Z0-9]|#{NonASCII}|#{Escape}/
NMStart = /[_a-zA-Z]|#{NonASCII}|#{Escape}/
NL = /\r\n|\r|\n|\f/
String1 = /"(?:[^\n\r\f\\"]|\\#{NL}|#{Escape})*"?/ # FIXME: buggy regexp
String2 = /'(?:[^\n\r\f\\']|\\#{NL}|#{Escape})*'?/ # FIXME: buggy regexp
String = /#{String1}|#{String2}/
HexColor = /#(?:#{Hex}{6}|#{Hex}{3})/
Color = /#{HexColor}/
Num = /-?(?:[0-9]+|[0-9]*\.[0-9]+)/
Name = /#{NMChar}+/
Ident = /-?#{NMStart}#{NMChar}*/
AtKeyword = /@#{Ident}/
Percentage = /#{Num}%/
reldimensions = %w[em ex px]
absdimensions = %w[in cm mm pt pc]
Unit = Regexp.union(*(reldimensions + absdimensions))
Dimension = /#{Num}#{Unit}/
Comment = %r! /\* (?: .*? \*/ | .* ) !mx
Function = /(?:url|alpha)\((?:[^)\n\r\f]|\\\))*\)?/
Id = /##{Name}/
Class = /\.#{Name}/
PseudoClass = /:#{Name}/
AttributeSelector = /\[[^\]]*\]?/
end
def scan_tokens tokens, options
value_expected = nil
states = [:initial]
until eos?
kind = nil
match = nil
if scan(/\s+/)
kind = :space
elsif case states.last
when :initial, :media
if scan(/(?>#{RE::Ident})(?!\()|\*/ox)
kind = :type
elsif scan RE::Class
kind = :class
elsif scan RE::Id
kind = :constant
elsif scan RE::PseudoClass
kind = :pseudo_class
elsif match = scan(RE::AttributeSelector)
# TODO: Improve highlighting inside of attribute selectors.
tokens << [:open, :string]
tokens << [match[0,1], :delimiter]
tokens << [match[1..-2], :content] if match.size > 2
tokens << [match[-1,1], :delimiter] if match[-1] == ?]
tokens << [:close, :string]
next
elsif match = scan(/@media/)
kind = :directive
states.push :media_before_name
end
when :block
if scan(/(?>#{RE::Ident})(?!\()/ox)
if value_expected
kind = :value
else
kind = :key
end
end
when :media_before_name
if scan RE::Ident
kind = :type
states[-1] = :media_after_name
end
when :media_after_name
if scan(/\{/)
kind = :operator
states[-1] = :media
end
when :comment
if scan(/(?:[^*\s]|\*(?!\/))+/)
kind = :comment
elsif scan(/\*\//)
kind = :comment
states.pop
elsif scan(/\s+/)
kind = :space
end
else
raise_inspect 'Unknown state', tokens
end
elsif scan(/\/\*/)
kind = :comment
states.push :comment
elsif scan(/\{/)
value_expected = false
kind = :operator
states.push :block
elsif scan(/\}/)
value_expected = false
if states.last == :block || states.last == :media
kind = :operator
states.pop
else
kind = :error
end
elsif match = scan(/#{RE::String}/o)
tokens << [:open, :string]
tokens << [match[0, 1], :delimiter]
tokens << [match[1..-2], :content] if match.size > 2
tokens << [match[-1, 1], :delimiter] if match.size >= 2
tokens << [:close, :string]
next
elsif match = scan(/#{RE::Function}/o)
tokens << [:open, :string]
start = match[/^\w+\(/]
tokens << [start, :delimiter]
if match[-1] == ?)
tokens << [match[start.size..-2], :content]
tokens << [')', :delimiter]
else
tokens << [match[start.size..-1], :content]
end
tokens << [:close, :string]
next
elsif scan(/(?: #{RE::Dimension} | #{RE::Percentage} | #{RE::Num} )/ox)
kind = :float
elsif scan(/#{RE::Color}/o)
kind = :color
elsif scan(/! *important/)
kind = :important
elsif scan(/rgb\([^()\n]*\)?/)
kind = :color
elsif scan(/#{RE::AtKeyword}/o)
kind = :directive
elsif match = scan(/ [+>:;,.=()\/] /x)
if match == ':'
value_expected = true
elsif match == ';'
value_expected = false
end
kind = :operator
else
getch
kind = :error
end
match ||= matched
if $DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens
end
raise_inspect 'Empty token', tokens unless match
tokens << [match, kind]
end
tokens
end
end
end
end
......@@ -6,6 +6,8 @@ module Scanners
include Streamable
register_for :debug
file_extension 'raydebug'
title 'CodeRay Token Dump'
protected
def scan_tokens tokens, options
......
......@@ -4,6 +4,7 @@ module Scanners
class Delphi < Scanner
register_for :delphi
file_extension 'pas'
RESERVED_WORDS = [
'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class',
......
module CodeRay
module Scanners
class Diff < Scanner
register_for :diff
title 'diff output'
def scan_tokens tokens, options
line_kind = nil
state = :initial
until eos?
kind = match = nil
if match = scan(/\n/)
if line_kind
tokens << [:end_line, line_kind]
line_kind = nil
end
tokens << [match, :space]
next
end
case state
when :initial
if match = scan(/--- |\+\+\+ |=+|_+/)
tokens << [:begin_line, line_kind = :head]
tokens << [match, :head]
next unless match = scan(/.+/)
kind = :plain
elsif match = scan(/Index: |Property changes on: /)
tokens << [:begin_line, line_kind = :head]
tokens << [match, :head]
next unless match = scan(/.+/)
kind = :plain
elsif match = scan(/Added: /)
tokens << [:begin_line, line_kind = :head]
tokens << [match, :head]
next unless match = scan(/.+/)
kind = :plain
state = :added
elsif match = scan(/\\ /)
tokens << [:begin_line, line_kind = :change]
tokens << [match, :change]
next unless match = scan(/.+/)
kind = :plain
elsif scan(/(@@)((?>[^@\n]*))(@@)/)
tokens << [:begin_line, line_kind = :change]
tokens << [self[1], :change]
tokens << [self[2], :plain]
tokens << [self[3], :change]
next unless match = scan(/.+/)
kind = :plain
elsif match = scan(/\+/)
tokens << [:begin_line, line_kind = :insert]
tokens << [match, :insert]
next unless match = scan(/.+/)
kind = :plain
elsif match = scan(/-/)
tokens << [:begin_line, line_kind = :delete]
tokens << [match, :delete]
next unless match = scan(/.+/)
kind = :plain
elsif scan(/ .*/)
kind = :comment
elsif scan(/.+/)
tokens << [:begin_line, line_kind = :head]
kind = :plain
else
raise_inspect 'else case rached'
end
when :added
if match = scan(/ \+/)
tokens << [:begin_line, line_kind = :insert]
tokens << [match, :insert]
next unless match = scan(/.+/)
kind = :plain
else
state = :initial
next
end
end
match ||= matched
if $DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens
end
raise_inspect 'Empty token', tokens unless match
tokens << [match, kind]
end
tokens << [:end_line, line_kind] if line_kind
tokens
end
end
end
end
module CodeRay
module Scanners
load :java
class Groovy < Java
include Streamable
register_for :groovy
# TODO: Check this!
GROOVY_KEYWORDS = %w[
as assert def in
]
KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
case instanceof new return throw typeof while as assert in
]
GROOVY_MAGIC_VARIABLES = %w[ it ]
IDENT_KIND = Java::IDENT_KIND.dup.
add(GROOVY_KEYWORDS, :keyword).
add(GROOVY_MAGIC_VARIABLES, :local_variable)
ESCAPE = / [bfnrtv$\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # no 4-byte unicode chars? U[a-fA-F0-9]{8}
REGEXP_ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | \d | [bBdDsSwW\/] /x
# TODO: interpretation inside ', ", /
STRING_CONTENT_PATTERN = {
"'" => /(?>\\[^\\'\n]+|[^\\'\n]+)+/,
'"' => /[^\\$"\n]+/,
"'''" => /(?>[^\\']+|'(?!''))+/,
'"""' => /(?>[^\\$"]+|"(?!""))+/,
'/' => /[^\\$\/\n]+/,
}
def scan_tokens tokens, options
state = :initial
inline_block_stack = []
inline_block_paren_depth = nil
string_delimiter = nil
import_clause = class_name_follows = last_token = after_def = false
value_expected = true
until eos?
kind = nil
match = nil
case state
when :initial
if match = scan(/ \s+ | \\\n /x)
tokens << [match, :space]
if match.index ?\n
import_clause = after_def = false
value_expected = true unless value_expected
end
next
elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
value_expected = true
after_def = false
kind = :comment
elsif bol? && scan(/ \#!.* /x)
kind = :doctype
elsif import_clause && scan(/ (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /ox)
after_def = value_expected = false
kind = :include
elsif match = scan(/ #{IDENT} | \[\] /ox)
kind = IDENT_KIND[match]
value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
if last_token == '.'
kind = :ident
elsif class_name_follows
kind = :class
class_name_follows = false
elsif after_def && check(/\s*[({]/)
kind = :method
after_def = false
elsif kind == :ident && last_token != '?' && check(/:/)
kind = :key
else
class_name_follows = true if match == 'class' || (import_clause && match == 'as')
import_clause = match == 'import'
after_def = true if match == 'def'
end
elsif scan(/;/)
import_clause = after_def = false
value_expected = true
kind = :operator
elsif scan(/\{/)
class_name_follows = after_def = false
value_expected = true
kind = :operator
if !inline_block_stack.empty?
inline_block_paren_depth += 1
end
# TODO: ~'...', ~"..." and ~/.../ style regexps
elsif match = scan(/ \.\.<? | \*?\.(?!\d)@? | \.& | \?:? | [,?:(\[] | -[->] | \+\+ |
&& | \|\| | \*\*=? | ==?~ | <=?>? | [-+*%^~&|>=!]=? | <<<?=? | >>>?=? /x)
value_expected = true
value_expected = :regexp if match == '~'
after_def = false
kind = :operator
elsif match = scan(/ [)\]}] /x)
value_expected = after_def = false
if !inline_block_stack.empty? && match == '}'
inline_block_paren_depth -= 1
if inline_block_paren_depth == 0 # closing brace of inline block reached
tokens << [match, :inline_delimiter]
tokens << [:close, :inline]
state, string_delimiter, inline_block_paren_depth = inline_block_stack.pop
next
end
end
elsif check(/[\d.]/)
after_def = value_expected = false
if scan(/0[xX][0-9A-Fa-f]+/)
kind = :hex
elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
kind = :oct
elsif scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
kind = :float
elsif scan(/\d+[lLgG]?/)
kind = :integer
end
elsif match = scan(/'''|"""/)
after_def = value_expected = false
state = :multiline_string
tokens << [:open, :string]
string_delimiter = match
kind = :delimiter
# TODO: record.'name'
elsif match = scan(/["']/)
after_def = value_expected = false
state = match == '/' ? :regexp : :string
tokens << [:open, state]
string_delimiter = match
kind = :delimiter
elsif value_expected && (match = scan(/\//))
after_def = value_expected = false
tokens << [:open, :regexp]
state = :regexp
string_delimiter = '/'
kind = :delimiter
elsif scan(/ @ #{IDENT} /ox)
after_def = value_expected = false
kind = :annotation
elsif scan(/\//)
after_def = false
value_expected = true
kind = :operator
else
getch
kind = :error
end
when :string, :regexp, :multiline_string
if scan(STRING_CONTENT_PATTERN[string_delimiter])
kind = :content
elsif match = scan(state == :multiline_string ? /'''|"""/ : /["'\/]/)
tokens << [match, :delimiter]
if state == :regexp
# TODO: regexp modifiers? s, m, x, i?
modifiers = scan(/[ix]+/)
tokens << [modifiers, :modifier] if modifiers && !modifiers.empty?
end
state = :string if state == :multiline_string
tokens << [:close, state]
string_delimiter = nil
after_def = value_expected = false
state = :initial
next
elsif (state == :string || state == :multiline_string) &&
(match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
if string_delimiter[0] == ?' && !(match == "\\\\" || match == "\\'")
kind = :content
else
kind = :char
end
elsif state == :regexp && scan(/ \\ (?: #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
kind = :char
elsif match = scan(/ \$ #{IDENT} /mox)
tokens << [:open, :inline]
tokens << ['$', :inline_delimiter]
match = match[1..-1]
tokens << [match, IDENT_KIND[match]]
tokens << [:close, :inline]
next
elsif match = scan(/ \$ \{ /x)
tokens << [:open, :inline]
tokens << ['${', :inline_delimiter]
inline_block_stack << [state, string_delimiter, inline_block_paren_depth]
inline_block_paren_depth = 1
state = :initial
next
elsif scan(/ \$ /mx)
kind = :content
elsif scan(/ \\. /mx)
kind = :content
elsif scan(/ \\ | \n /x)
tokens << [:close, state]
kind = :error
after_def = value_expected = false
state = :initial
else
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
end
else
raise_inspect 'Unknown state', tokens
end
match ||= matched
if $DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens
end
raise_inspect 'Empty token', tokens unless match
last_token = match unless [:space, :comment, :doctype].include? kind
tokens << [match, kind]
end
if [:multiline_string, :string, :regexp].include? state
tokens << [:close, state]
end
tokens
end
end
end
end
......@@ -2,12 +2,17 @@ module CodeRay
module Scanners
# HTML Scanner
#
# $Id$
class HTML < Scanner
include Streamable
register_for :html
KINDS_NOT_LOC = [
:comment, :doctype, :preprocessor,
:tag, :attribute_name, :operator,
:attribute_value, :delimiter, :content,
:plain, :entity, :error
]
ATTR_NAME = /[\w.:-]+/
ATTR_VALUE_UNQUOTED = ATTR_NAME
......@@ -65,14 +70,14 @@ module Scanners
if scan(/<!--.*?-->/m)
kind = :comment
elsif scan(/<!DOCTYPE.*?>/m)
kind = :preprocessor
kind = :doctype
elsif scan(/<\?xml.*?\?>/m)
kind = :preprocessor
elsif scan(/<\?.*?\?>|<%.*?%>/m)
kind = :comment
elsif scan(/<\/[-\w_.:]*>/m)
elsif scan(/<\/[-\w.:]*>/m)
kind = :tag
elsif match = scan(/<[-\w_.:]+>?/m)
elsif match = scan(/<[-\w.:]+>?/m)
kind = :tag
state = :attribute unless match[-1] == ?>
elsif scan(/[^<>&]+/)
......
module CodeRay
module Scanners
class Java < Scanner
include Streamable
register_for :java
helper :builtin_types
# http://java.sun.com/docs/books/tutorial/java/nutsandbolts/_keywords.html
KEYWORDS = %w[
assert break case catch continue default do else
finally for if instanceof import new package
return switch throw try typeof while
debugger export
]
RESERVED = %w[ const goto ]
CONSTANTS = %w[ false null true ]
MAGIC_VARIABLES = %w[ this super ]
TYPES = %w[
boolean byte char class double enum float int interface long
short void
] << '[]' # String[] should be highlighted as a type
DIRECTIVES = %w[
abstract extends final implements native private protected public
static strictfp synchronized throws transient volatile
]
IDENT_KIND = WordList.new(:ident).
add(KEYWORDS, :keyword).
add(RESERVED, :reserved).
add(CONSTANTS, :pre_constant).
add(MAGIC_VARIABLES, :local_variable).
add(TYPES, :type).
add(BuiltinTypes::List, :pre_type).
add(BuiltinTypes::List.select { |builtin| builtin[/(Error|Exception)$/] }, :exception).
add(DIRECTIVES, :directive)
ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
STRING_CONTENT_PATTERN = {
"'" => /[^\\']+/,
'"' => /[^\\"]+/,
'/' => /[^\\\/]+/,
}
IDENT = /[a-zA-Z_][A-Za-z_0-9]*/
def scan_tokens tokens, options
state = :initial
string_delimiter = nil
import_clause = class_name_follows = last_token_dot = false
until eos?
kind = nil
match = nil
case state
when :initial
if match = scan(/ \s+ | \\\n /x)
tokens << [match, :space]
next
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
tokens << [match, :comment]
next
elsif import_clause && scan(/ #{IDENT} (?: \. #{IDENT} )* /ox)
kind = :include
elsif match = scan(/ #{IDENT} | \[\] /ox)
kind = IDENT_KIND[match]
if last_token_dot
kind = :ident
elsif class_name_follows
kind = :class
class_name_follows = false
else
import_clause = true if match == 'import'
class_name_follows = true if match == 'class' || match == 'interface'
end
elsif scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x)
kind = :operator
elsif scan(/;/)
import_clause = false
kind = :operator
elsif scan(/\{/)
class_name_follows = false
kind = :operator
elsif check(/[\d.]/)
if scan(/0[xX][0-9A-Fa-f]+/)
kind = :hex
elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
kind = :oct
elsif scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
kind = :float
elsif scan(/\d+[lL]?/)
kind = :integer
end
elsif match = scan(/["']/)
tokens << [:open, :string]
state = :string
string_delimiter = match
kind = :delimiter
elsif scan(/ @ #{IDENT} /ox)
kind = :annotation
else
getch
kind = :error
end
when :string
if scan(STRING_CONTENT_PATTERN[string_delimiter])
kind = :content
elsif match = scan(/["'\/]/)
tokens << [match, :delimiter]
tokens << [:close, state]
string_delimiter = nil
state = :initial
next
elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
kind = :content
else
kind = :char
end
elsif scan(/\\./m)
kind = :content
elsif scan(/ \\ | $ /x)
tokens << [:close, :delimiter]
kind = :error
state = :initial
else
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
end
else
raise_inspect 'Unknown state', tokens
end
match ||= matched
if $DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens
end
raise_inspect 'Empty token', tokens unless match
last_token_dot = match == '.'
tokens << [match, kind]
end
if state == :string
tokens << [:close, state]
end
tokens
end
end
end
end
module CodeRay
module Scanners
class JavaScript < Scanner
include Streamable
register_for :java_script
file_extension 'js'
# The actual JavaScript keywords.
KEYWORDS = %w[
break case catch continue default delete do else
finally for function if in instanceof new
return switch throw try typeof var void while with
]
PREDEFINED_CONSTANTS = %w[
false null true undefined
]
MAGIC_VARIABLES = %w[ this arguments ] # arguments was introduced in JavaScript 1.4
KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
case delete in instanceof new return throw typeof while with
]
# Reserved for future use.
RESERVED_WORDS = %w[
abstract boolean byte char class debugger double enum export extends
final float goto implements import int interface long native package
private protected public short static super synchronized throws transient
volatile
]
IDENT_KIND = WordList.new(:ident).
add(RESERVED_WORDS, :reserved).
add(PREDEFINED_CONSTANTS, :pre_constant).
add(MAGIC_VARIABLES, :local_variable).
add(KEYWORDS, :keyword)
ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
REGEXP_ESCAPE = / [bBdDsSwW] /x
STRING_CONTENT_PATTERN = {
"'" => /[^\\']+/,
'"' => /[^\\"]+/,
'/' => /[^\\\/]+/,
}
KEY_CHECK_PATTERN = {
"'" => / [^\\']* (?: \\.? [^\\']* )* '? \s* : /x,
'"' => / [^\\"]* (?: \\.? [^\\"]* )* "? \s* : /x,
}
def scan_tokens tokens, options
state = :initial
string_delimiter = nil
value_expected = true
key_expected = false
function_expected = false
until eos?
kind = nil
match = nil
case state
when :initial
if match = scan(/ \s+ | \\\n /x)
value_expected = true if !value_expected && match.index(?\n)
tokens << [match, :space]
next
elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
value_expected = true
kind = :comment
elsif check(/\.?\d/)
key_expected = value_expected = false
if scan(/0[xX][0-9A-Fa-f]+/)
kind = :hex
elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
kind = :oct
elsif scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
kind = :float
elsif scan(/\d+/)
kind = :integer
end
elsif value_expected && match = scan(/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim)
html_scanner.tokenize match
value_expected = false
next
elsif match = scan(/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x)
value_expected = true
last_operator = match[-1]
key_expected = (last_operator == ?{) || (last_operator == ?,)
function_expected = false
kind = :operator
elsif scan(/ [)\]}]+ /x)
function_expected = key_expected = value_expected = false
kind = :operator
elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x)
kind = IDENT_KIND[match]
value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
if kind == :ident
if match.index(?$) # $ allowed inside an identifier
kind = :predefined
elsif function_expected
kind = :function
elsif check(/\s*[=:]\s*function\b/)
kind = :function
elsif key_expected && check(/\s*:/)
kind = :key
end
end
function_expected = (kind == :keyword) && (match == 'function')
key_expected = false
elsif match = scan(/["']/)
if key_expected && check(KEY_CHECK_PATTERN[match])
state = :key
else
state = :string
end
tokens << [:open, state]
string_delimiter = match
kind = :delimiter
elsif value_expected && (match = scan(/\/(?=\S)/))
tokens << [:open, :regexp]
state = :regexp
string_delimiter = '/'
kind = :delimiter
elsif scan(/ \/ /x)
value_expected = true
key_expected = false
kind = :operator
else
getch
kind = :error
end
when :string, :regexp, :key
if scan(STRING_CONTENT_PATTERN[string_delimiter])
kind = :content
elsif match = scan(/["'\/]/)
tokens << [match, :delimiter]
if state == :regexp
modifiers = scan(/[gim]+/)
tokens << [modifiers, :modifier] if modifiers && !modifiers.empty?
end
tokens << [:close, state]
string_delimiter = nil
key_expected = value_expected = false
state = :initial
next
elsif state != :regexp && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
kind = :content
else
kind = :char
end
elsif state == :regexp && scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
kind = :char
elsif scan(/\\./m)
kind = :content
elsif scan(/ \\ | $ /x)
tokens << [:close, state]
kind = :error
key_expected = value_expected = false
state = :initial
else
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
end
else
raise_inspect 'Unknown state', tokens
end
match ||= matched
if $DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens
end
raise_inspect 'Empty token', tokens unless match
tokens << [match, kind]
end
if [:string, :regexp].include? state
tokens << [:close, state]
end
tokens
end
protected
def reset_instance
super
@html_scanner.reset if defined? @html_scanner
end
def html_scanner
@html_scanner ||= CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
end
end
end
end
module CodeRay
module Scanners
class JSON < Scanner
include Streamable
register_for :json
file_extension 'json'
KINDS_NOT_LOC = [
:float, :char, :content, :delimiter,
:error, :integer, :operator, :value,
]
CONSTANTS = %w( true false null )
IDENT_KIND = WordList.new(:key).add(CONSTANTS, :value)
ESCAPE = / [bfnrt\\"\/] /x
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x
def scan_tokens tokens, options
state = :initial
stack = []
string_delimiter = nil
key_expected = false
until eos?
kind = nil
match = nil
case state
when :initial
if match = scan(/ \s+ | \\\n /x)
tokens << [match, :space]
next
elsif match = scan(/ [:,\[{\]}] /x)
kind = :operator
case match
when '{' then stack << :object; key_expected = true
when '[' then stack << :array
when ':' then key_expected = false
when ',' then key_expected = true if stack.last == :object
when '}', ']' then stack.pop # no error recovery, but works for valid JSON
end
elsif match = scan(/ true | false | null /x)
kind = IDENT_KIND[match]
elsif match = scan(/-?(?:0|[1-9]\d*)/)
kind = :integer
if scan(/\.\d+(?:[eE][-+]?\d+)?|[eE][-+]?\d+/)
match << matched
kind = :float
end
elsif match = scan(/"/)
state = key_expected ? :key : :string
tokens << [:open, state]
kind = :delimiter
else
getch
kind = :error
end
when :string, :key
if scan(/[^\\"]+/)
kind = :content
elsif scan(/"/)
tokens << ['"', :delimiter]
tokens << [:close, state]
state = :initial
next
elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
kind = :char
elsif scan(/\\./m)
kind = :content
elsif scan(/ \\ | $ /x)
tokens << [:close, :delimiter]
kind = :error
state = :initial
else
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
end
else
raise_inspect 'Unknown state', tokens
end
match ||= matched
if $DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens
end
raise_inspect 'Empty token', tokens unless match
tokens << [match, kind]
end
if [:string, :key].include? state
tokens << [:close, state]
end
tokens
end
end
end
end
......@@ -5,13 +5,15 @@ module Scanners
load :ruby
# Nitro XHTML Scanner
#
# $Id$
class NitroXHTML < Scanner
include Streamable
register_for :nitro_xhtml
file_extension :xhtml
title 'Nitro XHTML'
KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
NITRO_RUBY_BLOCK = /
<\?r
(?>
......
This diff is collapsed.
......@@ -4,9 +4,12 @@ module Scanners
class Plaintext < Scanner
register_for :plaintext, :plain
title 'Plain text'
include Streamable
KINDS_NOT_LOC = [:plain]
def scan_tokens tokens, options
text = (scan_until(/\z/) || '')
tokens << [text, :plain]
......
This diff is collapsed.
......@@ -5,12 +5,13 @@ module Scanners
load :ruby
# RHTML Scanner
#
# $Id$
class RHTML < Scanner
include Streamable
register_for :rhtml
title 'HTML ERB Template'
KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
ERB_RUBY_BLOCK = /
<%(?!%)[=-]?
......
......@@ -21,6 +21,10 @@ module Scanners
file_extension 'rb'
helper :patterns
if not defined? EncodingError
EncodingError = Class.new Exception
end
private
def scan_tokens tokens, options
......@@ -31,9 +35,10 @@ module Scanners
state = :initial
depth = nil
inline_block_stack = []
unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
patterns = Patterns # avoid constant lookup
until eos?
match = nil
kind = nil
......@@ -124,14 +129,15 @@ module Scanners
# {{{
if match = scan(/[ \t\f]+/)
kind = :space
match << scan(/\s*/) unless eos? or heredocs
match << scan(/\s*/) unless eos? || heredocs
value_expected = true if match.index(?\n)
tokens << [match, kind]
next
elsif match = scan(/\\?\n/)
kind = :space
if match == "\n"
value_expected = true # FIXME not quite true
value_expected = true
state = :initial if state == :undef_comma_expected
end
if heredocs
......@@ -146,17 +152,21 @@ module Scanners
tokens << [match, kind]
next
elsif bol? && match = scan(/\#!.*/)
tokens << [match, :doctype]
next
elsif match = scan(/\#.*/) or
( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
kind = :comment
value_expected = true
tokens << [match, kind]
next
elsif state == :initial
# IDENTS #
if match = scan(/#{patterns::METHOD_NAME}/o)
if match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
/#{patterns::METHOD_NAME}/o)
if last_token_dot
kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
else
......@@ -165,13 +175,12 @@ module Scanners
kind = :constant
elsif kind == :reserved
state = patterns::DEF_NEW_STATE[match]
value_expected = :set if patterns::VALUE_EXPECTING_KEYWORDS[match]
end
end
## experimental!
value_expected = :set if
patterns::REGEXP_ALLOWED[match] or check(/#{patterns::VALUE_FOLLOWS}/o)
value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o)
elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}|\(/o)
kind = :ident
value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
......@@ -190,6 +199,7 @@ module Scanners
depth -= 1
if depth == 0 # closing brace of inline block reached
state, depth, heredocs = inline_block_stack.pop
heredocs = nil if heredocs && heredocs.empty?
tokens << [match, :inline_delimiter]
kind = :inline
match = :close
......@@ -211,8 +221,9 @@ module Scanners
interpreted = true
state = patterns::StringState.new :regexp, interpreted, match
elsif match = scan(/#{patterns::NUMERIC}/o)
kind = if self[1] then :float else :integer end
# elsif match = scan(/[-+]?#{patterns::NUMERIC}/o)
elsif match = value_expected ? scan(/[-+]?#{patterns::NUMERIC}/o) : scan(/#{patterns::NUMERIC}/o)
kind = self[1] ? :float : :integer
elsif match = scan(/#{patterns::SYMBOL}/o)
case delim = match[1]
......@@ -274,18 +285,41 @@ module Scanners
else
kind = :error
match = getch
match = (scan(/./mu) rescue nil) || getch
if !unicode && match.size > 1
# warn 'Switchig to unicode mode: %p' % ['ä'[/#{patterns::METHOD_NAME}/uo]]
unicode = true
unscan
next
end
end
elsif state == :def_expected
state = :initial
if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
if scan(/self\./)
tokens << ['self', :pre_constant]
tokens << ['.', :operator]
end
if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
kind = :method
else
next
end
elsif state == :module_expected
if match = scan(/<</)
kind = :operator
else
state = :initial
if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
kind = :class
else
next
end
end
elsif state == :undef_expected
state = :undef_comma_expected
if match = scan(/#{patterns::METHOD_NAME_EX}/o)
......@@ -307,6 +341,22 @@ module Scanners
next
end
elsif state == :alias_expected
begin
match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
/(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
rescue EncodingError
raise if $DEBUG
end
if match
tokens << [self[1], (self[1][0] == ?: ? :symbol : :method)]
tokens << [self[2], :space]
tokens << [self[3], (self[3][0] == ?: ? :symbol : :method)]
end
state = :initial
next
elsif state == :undef_comma_expected
if match = scan(/,/)
kind = :operator
......@@ -316,24 +366,14 @@ module Scanners
next
end
elsif state == :module_expected
if match = scan(/<</)
kind = :operator
else
state = :initial
if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
kind = :class
else
next
end
end
end
# }}}
value_expected = value_expected == :set
last_token_dot = last_token_dot == :set
unless kind == :error
value_expected = value_expected == :set
last_token_dot = last_token_dot == :set
end
if $DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens, state
......
......@@ -6,7 +6,7 @@ module CodeRay
class Scheme < Scanner
register_for :scheme
file_extension :scm
file_extension 'scm'
CORE_FORMS = %w[
lambda let let* letrec syntax-case define-syntax let-syntax
......
This diff is collapsed.
......@@ -5,13 +5,12 @@ module Scanners
# XML Scanner
#
# $Id$
#
# Currently this is the same scanner as Scanners::HTML.
class XML < HTML
register_for :xml
file_extension 'xml'
end
end
......
This diff is collapsed.
......@@ -84,6 +84,9 @@ ol.CodeRay li { white-space: pre; }
.pp { color:#579; }
.pt { color:#66f; font-weight:bold; }
.r { color:#5de; font-weight:bold; }
.r, .kw { color:#5de; font-weight:bold }
.ke { color: #808; }
.rx { background-color:#221133; }
.rx .k { color:#f8f; }
......@@ -111,6 +114,16 @@ ol.CodeRay li { white-space: pre; }
.ty { color:#339; font-weight:bold; }
.v { color:#036; }
.xt { color:#444; }
.ins { background: #afa; }
.del { background: #faa; }
.chg { color: #aaf; background: #007; }
.head { color: #f8f; background: #505 }
.ins .ins { color: #080; font-weight:bold }
.del .del { color: #800; font-weight:bold }
.chg .chg { color: #66f; }
.head .head { color: #f4f; }
TOKENS
end
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment