Commit 00977706 authored by Jean-Philippe Lang's avatar Jean-Philippe Lang

Upgrade CodeRay to 0.9.2 (#3359).

git-svn-id: svn+ssh://rubyforge.org/var/svn/redmine/trunk@3592 e93f8b46-1217-0410-a6f0-8f06a7374b81
parent e6c8760a
= CodeRay - Trunk folder structure
== bench - Benchmarking system
All benchmarking stuff goes here.
Test inputs are stored in files named <code>example.<lang></code>.
Test outputs go to <code>bench/test.<encoder-default-file-extension></code>.
Run <code>bench/bench.rb</code> to get a usage description.
Run <code>rake bench</code> to perform an example benchmark.
== bin - Scripts
Executional files for CodeRay.
== demo - Demos and functional tests
Demonstrational scripts to show of CodeRay's features.
Run them as functional tests with <code>rake test:demos</code>.
== etc - Lots of stuff
Some addidtional files for CodeRay, mainly graphics and Vim scripts.
== gem_server - Gem output folder
For <code>rake gem</code>.
== lib - CodeRay library code
This is the base directory for the CodeRay library.
== rake_helpers - Rake helper libraries
Some files to enhance Rake, including the Autumnal Rdoc template and some scripts.
== test - Tests
Tests for the scanners.
Each language has its own subfolder and sub-suite.
Run with <code>rake test</code>.
#!/usr/bin/env ruby
# CodeRay Executable
#
# Version: 0.1
# Author: murphy
def err msg
$stderr.puts msg
end
begin
require 'coderay'
if ARGV.empty?
puts <<-USAGE
CodeRay #{CodeRay::VERSION} (http://rd.cYcnus.de/coderay)
Usage:
coderay -<lang> [-<format>] < file > output
coderay file [-<format>]
Example:
coderay -ruby -statistic < foo.rb
coderay codegen.c # generates codegen.c.html
USAGE
end
first, second = ARGV
if first
if first[/-(\w+)/] == first
lang = $1.to_sym
input = $stdin.read
tokens = :scan
elsif first == '-'
lang = $1.to_sym
input = $stdin.read
tokens = :scan
else
file = first
tokens = CodeRay.scan_file file
output_filename, output_ext = file, /#{Regexp.escape(File.extname(file))}$/
end
else
puts 'No lang/file given.'
exit 1
end
if second
if second[/-(\w+)/] == second
format = $1.to_sym
else
raise 'Invalid format (must be -xxx).'
end
else
$stderr.puts 'No format given; setting to default (HTML Page)'
format = :page
end
# TODO: allow streaming
if tokens == :scan
output = CodeRay::Duo[lang => format].highlight input #, :stream => true
else
output = tokens.encode format
end
out = $stdout
if output_filename
output_filename += '.' + CodeRay::Encoders[format]::FILE_EXTENSION
if File.exist? output_filename
err 'File %s already exists.' % output_filename
exit
else
out = File.open output_filename, 'w'
end
end
out.print output
rescue => boom
err "Error: #{boom.message}\n"
err boom.backtrace
err '-' * 50
err ARGV
exit 1
end
#!/usr/bin/env ruby
require 'coderay'
puts CodeRay::Encoders[:html]::CSS.new.stylesheet
module CodeRay
module Encoders
# The Tokens encoder converts the tokens to a simple
# readable format. It doesn't use colors and is mainly
# intended for console output.
#
# The tokens are converted with Tokens.write_token.
#
# The format is:
#
# <token-kind> \t <escaped token-text> \n
#
# Example:
#
# require 'coderay'
# puts CodeRay.scan("puts 3 + 4", :ruby).tokens
#
# prints:
#
# ident puts
# space
# integer 3
# space
# operator +
# space
# integer 4
#
class Tokens < Encoder
include Streamable
register_for :tokens
FILE_EXTENSION = 'tok'
protected
def token text, kind
@out << CodeRay::Tokens.write_token(text, kind)
end
end
end
end
module CodeRay
module Scanners
class Java < Scanner
register_for :java
RESERVED_WORDS = %w(abstract assert break case catch class
const continue default do else enum extends final finally for
goto if implements import instanceof interface native new
package private protected public return static strictfp super switch
synchronized this throw throws transient try void volatile while)
PREDEFINED_TYPES = %w(boolean byte char double float int long short)
PREDEFINED_CONSTANTS = %w(true false null)
IDENT_KIND = WordList.new(:ident).
add(RESERVED_WORDS, :reserved).
add(PREDEFINED_TYPES, :pre_type).
add(PREDEFINED_CONSTANTS, :pre_constant)
ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
def scan_tokens tokens, options
state = :initial
until eos?
kind = nil
match = nil
case state
when :initial
if scan(/ \s+ | \\\n /x)
kind = :space
elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
kind = :comment
elsif match = scan(/ \# \s* if \s* 0 /x)
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
kind = :comment
elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x)
kind = :operator
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
kind = IDENT_KIND[match]
if kind == :ident and check(/:(?!:)/)
match << scan(/:/)
kind = :label
end
elsif match = scan(/L?"/)
tokens << [:open, :string]
if match[0] == ?L
tokens << ['L', :modifier]
match = '"'
end
state = :string
kind = :delimiter
elsif scan(%r! \@ .* !x)
kind = :preprocessor
elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
kind = :char
elsif scan(/0[xX][0-9A-Fa-f]+/)
kind = :hex
elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
kind = :oct
elsif scan(/(?:\d+)(?![.eEfF])/)
kind = :integer
elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
kind = :float
else
getch
kind = :error
end
when :string
if scan(/[^\\\n"]+/)
kind = :content
elsif scan(/"/)
tokens << ['"', :delimiter]
tokens << [:close, :string]
state = :initial
next
elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
kind = :char
elsif scan(/ \\ | $ /x)
tokens << [:close, :string]
kind = :error
state = :initial
else
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
end
else
raise_inspect 'Unknown state', tokens
end
match ||= matched
if $DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens
end
raise_inspect 'Empty token', tokens unless match
tokens << [match, kind]
end
if state == :string
tokens << [:close, :string]
end
tokens
end
end
end
end
# http://pastie.textmate.org/50774/
module CodeRay module Scanners
class JavaScript < Scanner
register_for :javascript
RESERVED_WORDS = [
'asm', 'break', 'case', 'continue', 'default', 'do', 'else',
'for', 'goto', 'if', 'return', 'switch', 'while',
# 'struct', 'union', 'enum', 'typedef',
# 'static', 'register', 'auto', 'extern',
# 'sizeof',
'typeof',
# 'volatile', 'const', # C89
# 'inline', 'restrict', # C99
'var', 'function','try','new','in',
'instanceof','throw','catch'
]
PREDEFINED_CONSTANTS = [
'void', 'null', 'this',
'true', 'false','undefined',
]
IDENT_KIND = WordList.new(:ident).
add(RESERVED_WORDS, :reserved).
add(PREDEFINED_CONSTANTS, :pre_constant)
ESCAPE = / [rbfnrtv\n\\\/'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
def scan_tokens tokens, options
state = :initial
string_type = nil
regexp_allowed = true
until eos?
kind = :error
match = nil
if state == :initial
if scan(/ \s+ | \\\n /x)
kind = :space
elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
kind = :comment
regexp_allowed = false
elsif match = scan(/ \# \s* if \s* 0 /x)
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
kind = :comment
regexp_allowed = false
elsif regexp_allowed and scan(/\//)
tokens << [:open, :regexp]
state = :regex
kind = :delimiter
elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%] | \.(?!\d) /x)
kind = :operator
regexp_allowed=true
elsif match = scan(/ [$A-Za-z_][A-Za-z_0-9]* /x)
kind = IDENT_KIND[match]
# if kind == :ident and check(/:(?!:)/)
# match << scan(/:/)
# kind = :label
# end
regexp_allowed=false
elsif match = scan(/["']/)
tokens << [:open, :string]
string_type = matched
state = :string
kind = :delimiter
# elsif scan(/#\s*(\w*)/)
# kind = :preprocessor # FIXME multiline preprocs
# state = :include_expected if self[1] == 'include'
#
# elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
# kind = :char
elsif scan(/0[xX][0-9A-Fa-f]+/)
kind = :hex
regexp_allowed=false
elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
kind = :oct
regexp_allowed=false
elsif scan(/(?:\d+)(?![.eEfF])/)
kind = :integer
regexp_allowed=false
elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
kind = :float
regexp_allowed=false
else
getch
end
elsif state == :regex
if scan(/[^\\\/]+/)
kind = :content
elsif scan(/\\\/|\\\\/)
kind = :content
elsif scan(/\//)
tokens << [matched, :delimiter]
tokens << [:close, :regexp]
state = :initial
next
else
getch
kind = :content
end
elsif state == :string
if scan(/[^\\"']+/)
kind = :content
elsif scan(/["']/)
if string_type==matched
tokens << [matched, :delimiter]
tokens << [:close, :string]
state = :initial
string_type=nil
next
else
kind = :content
end
elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
kind = :char
elsif scan(/ \\ | $ /x)
kind = :error
state = :initial
else
raise "else case \" reached; %p not handled." % peek(1), tokens
end
# elsif state == :include_expected
# if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
# kind = :include
# state = :initial
#
# elsif match = scan(/\s+/)
# kind = :space
# state = :initial if match.index ?\n
#
# else
# getch
#
# end
#
else
raise 'else-case reached', tokens
end
match ||= matched
# raise [match, kind], tokens if kind == :error
tokens << [match, kind]
end
tokens
end
end
end end
\ No newline at end of file
module CodeRay module Scanners
class PHP < Scanner
register_for :php
RESERVED_WORDS = [
'and', 'or', 'xor', '__FILE__', 'exception', '__LINE__', 'array', 'as', 'break', 'case',
'class', 'const', 'continue', 'declare', 'default',
'die', 'do', 'echo', 'else', 'elseif',
'empty', 'enddeclare', 'endfor', 'endforeach', 'endif',
'endswitch', 'endwhile', 'eval', 'exit', 'extends',
'for', 'foreach', 'function', 'global', 'if',
'include', 'include_once', 'isset', 'list', 'new',
'print', 'require', 'require_once', 'return', 'static',
'switch', 'unset', 'use', 'var', 'while',
'__FUNCTION__', '__CLASS__', '__METHOD__', 'final', 'php_user_filter',
'interface', 'implements', 'extends', 'public', 'private',
'protected', 'abstract', 'clone', 'try', 'catch',
'throw', 'cfunction', 'old_function'
]
PREDEFINED_CONSTANTS = [
'null', '$this', 'true', 'false'
]
IDENT_KIND = WordList.new(:ident).
add(RESERVED_WORDS, :reserved).
add(PREDEFINED_CONSTANTS, :pre_constant)
ESCAPE = / [\$\wrbfnrtv\n\\\/'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
def scan_tokens tokens, options
state = :waiting_php
string_type = nil
regexp_allowed = true
until eos?
kind = :error
match = nil
if state == :initial
if scan(/ \s+ | \\\n /x)
kind = :space
elsif scan(/\?>/)
kind = :char
state = :waiting_php
elsif scan(%r{ (//|\#) [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) }mx)
kind = :comment
regexp_allowed = false
elsif match = scan(/ \# \s* if \s* 0 /x)
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
kind = :comment
regexp_allowed = false
elsif regexp_allowed and scan(/\//)
tokens << [:open, :regexp]
state = :regex
kind = :delimiter
elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%] | \.(?!\d) /x)
kind = :operator
regexp_allowed=true
elsif match = scan(/ [$@A-Za-z_][A-Za-z_0-9]* /x)
kind = IDENT_KIND[match]
regexp_allowed=false
elsif match = scan(/["']/)
tokens << [:open, :string]
string_type = matched
state = :string
kind = :delimiter
elsif scan(/0[xX][0-9A-Fa-f]+/)
kind = :hex
regexp_allowed=false
elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
kind = :oct
regexp_allowed=false
elsif scan(/(?:\d+)(?![.eEfF])/)
kind = :integer
regexp_allowed=false
elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
kind = :float
regexp_allowed=false
else
getch
end
elsif state == :regex
if scan(/[^\\\/]+/)
kind = :content
elsif scan(/\\\/|\\/)
kind = :content
elsif scan(/\//)
tokens << [matched, :delimiter]
tokens << [:close, :regexp]
state = :initial
next
else
getch
kind = :content
end
elsif state == :string
if scan(/[^\\"']+/)
kind = :content
elsif scan(/["']/)
if string_type==matched
tokens << [matched, :delimiter]
tokens << [:close, :string]
state = :initial
string_type=nil
next
else
kind = :content
end
elsif scan(/ \\ (?: \S ) /mox)
kind = :char
elsif scan(/ \\ | $ /x)
kind = :error
state = :initial
else
raise "else case \" reached; %p not handled." % peek(1), tokens
end
elsif state == :waiting_php
if scan(/<\?php/m)
kind = :char
state = :initial
elsif scan(/[^<]+/)
kind = :comment
else
kind = :comment
getch
end
else
raise 'else-case reached', tokens
end
match ||= matched
tokens << [match, kind]
end
tokens
end
end
end end
\ No newline at end of file
= CodeRay = CodeRay
[- Tired of blue'n'gray? Try the original version of this documentation on [- Tired of blue'n'gray? Try the original version of this documentation on
http://rd.cYcnus.de/coderay/doc (use Ctrl+Click to open it in its own frame.) -] coderay.rubychan.de[http://coderay.rubychan.de/doc/] (use Ctrl+Click to open it in its own frame.) -]
== About == About
CodeRay is a Ruby library for syntax highlighting. CodeRay is a Ruby library for syntax highlighting.
...@@ -18,14 +18,11 @@ And with line numbers. ...@@ -18,14 +18,11 @@ And with line numbers.
* is what everybody should have on their website * is what everybody should have on their website
* solves all your problems and makes the girls run after you * solves all your problems and makes the girls run after you
Version: 0.7.4 (2006.october.20) Version: 0.9.2
Author:: murphy (Kornelius Kalnbach) Author:: murphy (Kornelius Kalnbach)
Contact:: murphy rubychan de Contact:: murphy rubychan de
Website:: coderay.rubychan.de[http://coderay.rubychan.de] Website:: coderay.rubychan.de[http://coderay.rubychan.de]
License:: GNU LGPL; see LICENSE file in the main directory. License:: GNU LGPL; see LICENSE file in the main directory.
Subversion:: $Id: README 219 2006-10-20 15:52:25Z murphy $
-----
== Installation == Installation
...@@ -33,17 +30,10 @@ You need RubyGems[http://rubyforge.org/frs/?group_id=126]. ...@@ -33,17 +30,10 @@ You need RubyGems[http://rubyforge.org/frs/?group_id=126].
% gem install coderay % gem install coderay
Since CodeRay is still in beta stage, nightly buildy may be useful:
% gem install coderay -rs rd.cYcnus.de/coderay
=== Dependencies === Dependencies
CodeRay needs Ruby 1.8 and the CodeRay needs Ruby 1.8.6 or later. It also runs with Ruby 1.9.1+ and JRuby 1.1+.
strscan[http://www.ruby-doc.org/stdlib/libdoc/strscan/rdoc/index.htm]
library (part of the standard library.) It should also run with Ruby 1.9 and
yarv.
== Example Usage == Example Usage
...@@ -60,11 +50,9 @@ yarv. ...@@ -60,11 +50,9 @@ yarv.
See CodeRay. See CodeRay.
Please report errors in this documentation to <coderay cycnus de>. Please report errors in this documentation to <murphy rubychan de>.
-----
== Credits == Credits
=== Special Thanks to === Special Thanks to
...@@ -72,30 +60,43 @@ Please report errors in this documentation to <coderay cycnus de>. ...@@ -72,30 +60,43 @@ Please report errors in this documentation to <coderay cycnus de>.
* licenser (Heinz N. Gies) for ending my QBasic career, inventing the Coder * licenser (Heinz N. Gies) for ending my QBasic career, inventing the Coder
project and the input/output plugin system. project and the input/output plugin system.
CodeRay would not exist without him. CodeRay would not exist without him.
* bovi (Daniel Bovensiepen) for helping me out on various occasions.
=== Thanks to === Thanks to
* Caleb Clausen for writing RubyLexer (see * Caleb Clausen for writing RubyLexer (see
http://rubyforge.org/projects/rubylexer) and lots of very interesting mail http://rubyforge.org/projects/rubylexer) and lots of very interesting mail
traffic traffic
* birkenfeld (Georg Brandl) and mitsuhiku (Arnim Ronacher) for PyKleur. You * birkenfeld (Georg Brandl) and mitsuhiku (Arnim Ronacher) for PyKleur, now pygments.
guys rock! You guys rock!
* Jamis Buck for writing Syntax (see http://rubyforge.org/projects/syntax) * Jamis Buck for writing Syntax (see http://rubyforge.org/projects/syntax)
I got some useful ideas from it. I got some useful ideas from it.
* Doug Kearns and everyone else who worked on ruby.vim - it not only helped me * Doug Kearns and everyone else who worked on ruby.vim - it not only helped me
coding CodeRay, but also gave me a wonderful target to reach for the Ruby coding CodeRay, but also gave me a wonderful target to reach for the Ruby
scanner. scanner.
* everyone who used CodeBB on http://www.rubyforen.de and * everyone who uses CodeBB on http://www.rubyforen.de and http://www.python-forum.de
http://www.infhu.de/mx * iGEL, magichisoka, manveru, WoNáDo and everyone I forgot from rubyforen.de
* iGEL, magichisoka, manveru, WoNDo and everyone I forgot from rubyforen.de * Dethix from ruby-mine.de
* Daniel and Dethix from ruby-mine.de * zickzackw
* Dookie (who is no longer with us...) and Leonidas from * Dookie (who is no longer with us...) and Leonidas from http://www.python-forum.de
http://www.python-forum.de
* Andreas Schwarz for finding out that CaseIgnoringWordList was not case * Andreas Schwarz for finding out that CaseIgnoringWordList was not case
ignoring! Such things really make you write tests. ignoring! Such things really make you write tests.
* closure for the first version of the Scheme scanner.
* Stefan Walk for the first version of the JavaScript and PHP scanners.
* Josh Goebel for another version of the JavaScript scanner, a SQL and a Diff scanner.
* Jonathan Younger for pointing out the licence confusion caused by wrong LICENSE file.
* Jeremy Hinegardner for finding the shebang-on-empty-file bug in FileType.
* Charles Oliver Nutter and Yehuda Katz for helping me benchmark CodeRay on JRuby.
* Andreas Neuhaus for pointing out a markup bug in coderay/for_redcloth.
* 0xf30fc7 for the FileType patch concerning Delphi file extensions.
* The folks at redmine.org - thank you for using and fixing CodeRay!
* Keith Pitt for his SQL scanners
* Rob Aldred for the terminal encoder
* Trans for pointing out $DEBUG dependencies
* Flameeyes for finding that Term::ANSIColor was obsolete
* matz and all Ruby gods and gurus * matz and all Ruby gods and gurus
* The inventors of: the computer, the internet, the true color display, HTML & * The inventors of: the computer, the internet, the true color display, HTML &
CSS, VIM, RUBY, pizza, microwaves, guitars, scouting, programming, anime, CSS, VIM, Ruby, pizza, microwaves, guitars, scouting, programming, anime,
manga, coke and green ice tea. manga, coke and green ice tea.
Where would we be without all those people? Where would we be without all those people?
...@@ -103,23 +104,27 @@ Where would we be without all those people? ...@@ -103,23 +104,27 @@ Where would we be without all those people?
=== Created using === Created using
* Ruby[http://ruby-lang.org/] * Ruby[http://ruby-lang.org/]
* Chihiro (my Sony VAIO laptop), Henrietta (my new MacBook) and * Chihiro (my Sony VAIO laptop); Henrietta (my old MacBook);
Seras (my Athlon 2200+ tower) Triella, born Rico (my new MacBook); as well as
* VIM[http://vim.org] and TextMate[http://macromates.com] Seras and Hikari (my PCs)
* RDE[http://homepage2.nifty.com/sakazuki/rde_e.html] * RDE[http://homepage2.nifty.com/sakazuki/rde_e.html],
* Microsoft Windows (yes, I confess!) and MacOS X VIM[http://vim.org] and TextMate[http://macromates.com]
* Firefox[http://www.mozilla.org/products/firefox/] and * Subversion[http://subversion.tigris.org/]
* Redmine[http://redmine.org/]
* Firefox[http://www.mozilla.org/products/firefox/],
Firebug[http://getfirebug.com/], Safari[http://www.apple.com/safari/], and
Thunderbird[http://www.mozilla.org/products/thunderbird/] Thunderbird[http://www.mozilla.org/products/thunderbird/]
* Rake[http://rake.rubyforge.org/] * RubyGems[http://docs.rubygems.org/] and Rake[http://rake.rubyforge.org/]
* RubyGems[http://docs.rubygems.org/] * TortoiseSVN[http://tortoisesvn.tigris.org/] using Apache via
* {Subversion/TortoiseSVN}[http://tortoisesvn.tigris.org/] using Apache via
XAMPP[http://www.apachefriends.org/en/xampp.html] XAMPP[http://www.apachefriends.org/en/xampp.html]
* RDoc (though I'm quite unsatisfied with it) * RDoc (though I'm quite unsatisfied with it)
* Microsoft Windows (yes, I confess!) and MacOS X
* GNUWin32, MinGW and some other tools to make the shell under windows a bit * GNUWin32, MinGW and some other tools to make the shell under windows a bit
more useful less useless
* Term::ANSIColor[http://term-ansicolor.rubyforge.org/] * Term::ANSIColor[http://term-ansicolor.rubyforge.org/]
* PLEAC[http://pleac.sourceforge.net/] code examples
--- === Free
* As you can see, CodeRay was created under heavy use of *free* software. * As you can see, CodeRay was created under heavy use of *free* software.
* So CodeRay is also *free*. * So CodeRay is also *free*.
......
# = CodeRay Library # = CodeRay Library
# #
# $Id: coderay.rb 227 2007-04-24 12:26:18Z murphy $
#
# CodeRay is a Ruby library for syntax highlighting. # CodeRay is a Ruby library for syntax highlighting.
# #
# I try to make CodeRay easy to use and intuitive, but at the same time fully featured, complete, # I try to make CodeRay easy to use and intuitive, but at the same time fully featured, complete,
...@@ -16,7 +14,7 @@ ...@@ -16,7 +14,7 @@
# #
# Here's a fancy graphic to light up this gray docu: # Here's a fancy graphic to light up this gray docu:
# #
# http://rd.cYcnus.de/coderay/scheme.png # http://cycnus.de/raindark/coderay/scheme.png
# #
# == Documentation # == Documentation
# #
...@@ -107,7 +105,7 @@ ...@@ -107,7 +105,7 @@
# #
# CodeRay.scan_stream:: Scan in stream mode. # CodeRay.scan_stream:: Scan in stream mode.
# #
# == All-in-One Encoding # == All-in-One Encoding
# #
# CodeRay.encode:: Highlight a string with a given input and output format. # CodeRay.encode:: Highlight a string with a given input and output format.
# #
...@@ -128,15 +126,18 @@ ...@@ -128,15 +126,18 @@
# If you want to re-use scanners and encoders (because that is faster), see # If you want to re-use scanners and encoders (because that is faster), see
# CodeRay::Duo for the most convenient (and recommended) interface. # CodeRay::Duo for the most convenient (and recommended) interface.
module CodeRay module CodeRay
$CODERAY_DEBUG ||= false
# Version: Major.Minor.Teeny[.Revision] # Version: Major.Minor.Teeny[.Revision]
# Major: 0 for pre-release # Major: 0 for pre-stable, 1 for stable
# Minor: odd for beta, even for stable # Minor: feature milestone
# Teeny: development state # Teeny: development state, 0 for pre-release
# Revision: Subversion Revision number (generated on rake) # Revision: Subversion Revision number (generated on rake gem:make)
VERSION = '0.7.6' VERSION = '0.9.2'
require 'coderay/tokens' require 'coderay/tokens'
require 'coderay/token_classes'
require 'coderay/scanner' require 'coderay/scanner'
require 'coderay/encoder' require 'coderay/encoder'
require 'coderay/duo' require 'coderay/duo'
...@@ -315,6 +316,7 @@ end ...@@ -315,6 +316,7 @@ end
# Run a test script. # Run a test script.
if $0 == __FILE__ if $0 == __FILE__
$stderr.print 'Press key to print demo.'; gets $stderr.print 'Press key to print demo.'; gets
# Just use this file as an example of Ruby code.
code = File.read(__FILE__)[/module CodeRay.*/m] code = File.read(__FILE__)[/module CodeRay.*/m]
print CodeRay.scan(code, :ruby).html print CodeRay.scan(code, :ruby).html
end end
...@@ -2,8 +2,6 @@ module CodeRay ...@@ -2,8 +2,6 @@ module CodeRay
# = Duo # = Duo
# #
# $Id: scanner.rb 123 2006-03-21 14:46:34Z murphy $
#
# A Duo is a convenient way to use CodeRay. You just create a Duo, # A Duo is a convenient way to use CodeRay. You just create a Duo,
# giving it a lang (language of the input code) and a format (desired # giving it a lang (language of the input code) and a format (desired
# output format), and call Duo#highlight with the code. # output format), and call Duo#highlight with the code.
......
require "stringio"
module CodeRay module CodeRay
# This module holds the Encoder class and its subclasses. # This module holds the Encoder class and its subclasses.
...@@ -126,36 +124,66 @@ module CodeRay ...@@ -126,36 +124,66 @@ module CodeRay
@out = '' @out = ''
end end
# Called with +text+ and +kind+ of the currently scanned token. # Called with +content+ and +kind+ of the currently scanned token.
# For simple scanners, it's enougth to implement this method. # For simple scanners, it's enougth to implement this method.
# #
# By default, it calls text_token or block_token, depending on # By default, it calls text_token or block_token, depending on
# whether +text+ is a String. # whether +content+ is a String.
def token text, kind def token content, kind
out = encoded_token =
if text.is_a? ::String # Ruby 1.9: :open.is_a? String if content.is_a? ::String
text_token text, kind text_token content, kind
elsif text.is_a? ::Symbol elsif content.is_a? ::Symbol
block_token text, kind block_token content, kind
else else
raise 'Unknown token text type: %p' % text raise 'Unknown token content type: %p' % [content]
end end
@out << out if @out append_encoded_token_to_output encoded_token
end end
def append_encoded_token_to_output encoded_token
@out << encoded_token if encoded_token && defined?(@out) && @out
end
# Called for each text token ([text, kind]), where text is a String.
def text_token text, kind def text_token text, kind
end end
# Called for each block (non-text) token ([action, kind]),
# where +action+ is a Symbol.
#
# Calls open_token, close_token, begin_line, and end_line according to
# the value of +action+.
def block_token action, kind def block_token action, kind
case action case action
when :open when :open
open_token kind open_token kind
when :close when :close
close_token kind close_token kind
when :begin_line
begin_line kind
when :end_line
end_line kind
else else
raise 'unknown block action: %p' % action raise 'unknown block action: %p' % action
end end
end end
# Called for each block token at the start of the block ([:open, kind]).
def open_token kind
end
# Called for each block token end of the block ([:close, kind]).
def close_token kind
end
# Called for each line token block at the start of the line ([:begin_line, kind]).
def begin_line kind
end
# Called for each line token block at the end of the line ([:end_line, kind]).
def end_line kind
end
# Called with merged options after encoding starts. # Called with merged options after encoding starts.
# The return value is the result of encoding, typically @out. # The return value is the result of encoding, typically @out.
...@@ -167,8 +195,16 @@ module CodeRay ...@@ -167,8 +195,16 @@ module CodeRay
# #
# The already created +tokens+ object must be used; it can be a # The already created +tokens+ object must be used; it can be a
# TokenStream or a Tokens object. # TokenStream or a Tokens object.
def compile tokens, options if RUBY_VERSION >= '1.9'
tokens.each(&self) def compile tokens, options
for text, kind in tokens
token text, kind
end
end
else
def compile tokens, options
tokens.each(&self)
end
end end
end end
......
module CodeRay module CodeRay
module Encoders module Encoders
map :stats => :statistic, map \
:loc => :lines_of_code,
:plain => :text, :plain => :text,
:stats => :statistic,
:terminal => :term,
:tex => :latex :tex => :latex
end end
......
($:.unshift '../..'; require 'coderay') unless defined? CodeRay
module CodeRay
module Encoders
load :token_class_filter
class CommentFilter < TokenClassFilter
register_for :comment_filter
DEFAULT_OPTIONS = superclass::DEFAULT_OPTIONS.merge \
:exclude => [:comment]
end
end
end
if $0 == __FILE__
$VERBOSE = true
$: << File.join(File.dirname(__FILE__), '..')
eval DATA.read, nil, $0, __LINE__ + 4
end
__END__
require 'test/unit'
class CommentFilterTest < Test::Unit::TestCase
def test_filtering_comments
tokens = CodeRay.scan <<-RUBY, :ruby
#!/usr/bin/env ruby
# a minimal Ruby program
puts "Hello world!"
RUBY
assert_equal <<-RUBY_FILTERED, tokens.comment_filter.text
#!/usr/bin/env ruby
puts "Hello world!"
RUBY_FILTERED
end
end
\ No newline at end of file
...@@ -35,6 +35,14 @@ module Encoders ...@@ -35,6 +35,14 @@ module Encoders
">" ">"
end end
def begin_line kind
"#{kind}["
end
def end_line kind
"]"
end
end end
end end
......
...@@ -9,10 +9,9 @@ module Encoders ...@@ -9,10 +9,9 @@ module Encoders
register_for :div register_for :div
DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({ DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge \
:css => :style, :css => :style,
:wrap => :div, :wrap => :div
})
end end
......
($:.unshift '../..'; require 'coderay') unless defined? CodeRay
module CodeRay
module Encoders
class Filter < Encoder
register_for :filter
protected
def setup options
@out = Tokens.new
end
def text_token text, kind
[text, kind] if include_text_token? text, kind
end
def include_text_token? text, kind
true
end
def block_token action, kind
[action, kind] if include_block_token? action, kind
end
def include_block_token? action, kind
true
end
end
end
end
if $0 == __FILE__
$VERBOSE = true
$: << File.join(File.dirname(__FILE__), '..')
eval DATA.read, nil, $0, __LINE__ + 4
end
__END__
require 'test/unit'
class FilterTest < Test::Unit::TestCase
def test_creation
assert CodeRay::Encoders::Filter < CodeRay::Encoders::Encoder
filter = nil
assert_nothing_raised do
filter = CodeRay.encoder :filter
end
assert_kind_of CodeRay::Encoders::Encoder, filter
end
def test_filtering_text_tokens
tokens = CodeRay::Tokens.new
10.times do |i|
tokens << [i.to_s, :index]
end
assert_equal tokens, CodeRay::Encoders::Filter.new.encode_tokens(tokens)
assert_equal tokens, tokens.filter
end
def test_filtering_block_tokens
tokens = CodeRay::Tokens.new
10.times do |i|
tokens << [:open, :index]
tokens << [i.to_s, :content]
tokens << [:close, :index]
end
assert_equal tokens, CodeRay::Encoders::Filter.new.encode_tokens(tokens)
assert_equal tokens, tokens.filter
end
end
require "set" require 'set'
module CodeRay module CodeRay
module Encoders module Encoders
...@@ -25,10 +25,6 @@ module Encoders ...@@ -25,10 +25,6 @@ module Encoders
# #
# == Options # == Options
# #
# === :escape
# Escape html entities
# Default: true
#
# === :tab_width # === :tab_width
# Convert \t characters to +n+ spaces (a number.) # Convert \t characters to +n+ spaces (a number.)
# Default: 8 # Default: 8
...@@ -45,6 +41,12 @@ module Encoders ...@@ -45,6 +41,12 @@ module Encoders
# #
# Default: nil # Default: nil
# #
# === :title
#
# The title of the HTML page (works only when :wrap is set to :page.)
#
# Default: 'CodeRay output'
#
# === :line_numbers # === :line_numbers
# Include line numbers in :table, :inline, :list or nil (no line numbers) # Include line numbers in :table, :inline, :list or nil (no line numbers)
# #
...@@ -60,6 +62,16 @@ module Encoders ...@@ -60,6 +62,16 @@ module Encoders
# #
# Default: 10 # Default: 10
# #
# === :highlight_lines
#
# Highlights certain line numbers.
# Can be any Enumerable, typically just an Array or Range, of numbers.
#
# Bolding is deactivated when :highlight_lines is set. It only makes sense
# in combination with :line_numbers.
#
# Default: nil
#
# === :hint # === :hint
# Include some information into the output using the title attribute. # Include some information into the output using the title attribute.
# Can be :info (show token type on mouse-over), :info_long (with full path) # Can be :info (show token type on mouse-over), :info_long (with full path)
...@@ -74,19 +86,18 @@ module Encoders ...@@ -74,19 +86,18 @@ module Encoders
FILE_EXTENSION = 'html' FILE_EXTENSION = 'html'
DEFAULT_OPTIONS = { DEFAULT_OPTIONS = {
:escape => true,
:tab_width => 8, :tab_width => 8,
:level => :xhtml,
:css => :class, :css => :class,
:style => :cycnus, :style => :cycnus,
:wrap => nil, :wrap => nil,
:title => 'CodeRay output',
:line_numbers => nil, :line_numbers => nil,
:line_number_start => 1, :line_number_start => 1,
:bold_every => 10, :bold_every => 10,
:highlight_lines => nil,
:hint => false, :hint => false,
} }
...@@ -141,7 +152,7 @@ module Encoders ...@@ -141,7 +152,7 @@ module Encoders
when :debug when :debug
classes.inspect classes.inspect
end end
" title=\"#{title}\"" title ? " title=\"#{title}\"" : ''
end end
def setup options def setup options
...@@ -150,7 +161,6 @@ module Encoders ...@@ -150,7 +161,6 @@ module Encoders
@HTML_ESCAPE = HTML_ESCAPE.dup @HTML_ESCAPE = HTML_ESCAPE.dup
@HTML_ESCAPE["\t"] = ' ' * options[:tab_width] @HTML_ESCAPE["\t"] = ' ' * options[:tab_width]
@escape = options[:escape]
@opened = [nil] @opened = [nil]
@css = CSS.new options[:style] @css = CSS.new options[:style]
...@@ -164,7 +174,7 @@ module Encoders ...@@ -164,7 +174,7 @@ module Encoders
when :class when :class
@css_style = Hash.new do |h, k| @css_style = Hash.new do |h, k|
c = Tokens::ClassOfKind[k.first] c = CodeRay::Tokens::ClassOfKind[k.first]
if c == :NO_HIGHLIGHT and not hint if c == :NO_HIGHLIGHT and not hint
h[k.dup] = false h[k.dup] = false
else else
...@@ -222,43 +232,70 @@ module Encoders ...@@ -222,43 +232,70 @@ module Encoders
@out.css = @css @out.css = @css
@out.numerize! options[:line_numbers], options @out.numerize! options[:line_numbers], options
@out.wrap! options[:wrap] @out.wrap! options[:wrap]
@out.apply_title! options[:title]
super super
end end
def token text, type def token text, type = :plain
if text.is_a? ::String case text
if @escape && (text =~ /#{HTML_ESCAPE_PATTERN}/o)
when nil
# raise 'Token with nil as text was given: %p' % [[text, type]]
when String
if text =~ /#{HTML_ESCAPE_PATTERN}/o
text = text.gsub(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] } text = text.gsub(/#{HTML_ESCAPE_PATTERN}/o) { |m| @HTML_ESCAPE[m] }
end end
@opened[0] = type @opened[0] = type
if style = @css_style[@opened] if text != "\n" && style = @css_style[@opened]
@out << style << text << '</span>' @out << style << text << '</span>'
else else
@out << text @out << text
end end
else
case text
when :open # token groups, eg. strings
@opened[0] = type when :open
@out << (@css_style[@opened] || '<span>') @opened[0] = type
@opened << type @out << (@css_style[@opened] || '<span>')
when :close @opened << type
if @opened.empty? when :close
# nothing to close if @opened.empty?
else # nothing to close
if $DEBUG and (@opened.size == 1 or @opened.last != type) else
raise 'Malformed token stream: Trying to close a token (%p) \ if $CODERAY_DEBUG and (@opened.size == 1 or @opened.last != type)
that is not open. Open are: %p.' % [type, @opened[1..-1]] raise 'Malformed token stream: Trying to close a token (%p) \
end that is not open. Open are: %p.' % [type, @opened[1..-1]]
@out << '</span>'
@opened.pop
end end
when nil @out << '</span>'
raise 'Token with nil as text was given: %p' % [[text, type]] @opened.pop
end
# whole lines to be highlighted, eg. a deleted line in a diff
when :begin_line
@opened[0] = type
if style = @css_style[@opened]
@out << style.sub('<span', '<div')
else else
raise 'unknown token kind: %p' % text @out << '<div>'
end end
@opened << type
when :end_line
if @opened.empty?
# nothing to close
else
if $CODERAY_DEBUG and (@opened.size == 1 or @opened.last != type)
raise 'Malformed token stream: Trying to close a line (%p) \
that is not open. Open are: %p.' % [type, @opened[1..-1]]
end
@out << '</div>'
@opened.pop
end
else
raise 'unknown token kind: %p' % [text]
end end
end end
......
...@@ -27,16 +27,19 @@ module Encoders ...@@ -27,16 +27,19 @@ module Encoders
1.upto(styles.size) do |offset| 1.upto(styles.size) do |offset|
break if style = cl[styles[offset .. -1]] break if style = cl[styles[offset .. -1]]
end end
raise 'Style not found: %p' % [styles] if $DEBUG and style.empty? # warn 'Style not found: %p' % [styles] if style.empty?
return style return style
end end
private private
CSS_CLASS_PATTERN = / CSS_CLASS_PATTERN = /
( (?: # $1 = classes ( # $1 = selectors
\s* \. [-\w]+ (?:
)+ ) (?: \s* \. [-\w]+ )+
\s* ,?
)+
)
\s* \{ \s* \s* \{ \s*
( [^\}]+ )? # $2 = style ( [^\}]+ )? # $2 = style
\s* \} \s* \s* \} \s*
...@@ -44,12 +47,14 @@ module Encoders ...@@ -44,12 +47,14 @@ module Encoders
( . ) # $3 = error ( . ) # $3 = error
/mx /mx
def parse stylesheet def parse stylesheet
stylesheet.scan CSS_CLASS_PATTERN do |classes, style, error| stylesheet.scan CSS_CLASS_PATTERN do |selectors, style, error|
raise "CSS parse error: '#{error.inspect}' not recognized" if error raise "CSS parse error: '#{error.inspect}' not recognized" if error
styles = classes.scan(/[-\w]+/) for selector in selectors.split(',')
cl = styles.pop classes = selector.scan(/[-\w]+/)
@classes[cl] ||= Hash.new cl = classes.pop
@classes[cl][styles] = style.to_s.strip @classes[cl] ||= Hash.new
@classes[cl][classes] = style.to_s.strip.delete(' ').chomp(';')
end
end end
end end
......
...@@ -32,9 +32,19 @@ module Encoders ...@@ -32,9 +32,19 @@ module Encoders
#end #end
bold_every = options[:bold_every] bold_every = options[:bold_every]
highlight_lines = options[:highlight_lines]
bolding = bolding =
if bold_every == false if bold_every == false && highlight_lines == nil
proc { |line| line.to_s } proc { |line| line.to_s }
elsif highlight_lines.is_a? Enumerable
highlight_lines = highlight_lines.to_set
proc do |line|
if highlight_lines.include? line
"<strong class=\"highlighted\">#{line}</strong>" # highlighted line numbers in bold
else
line.to_s
end
end
elsif bold_every.is_a? Integer elsif bold_every.is_a? Integer
raise ArgumentError, ":bolding can't be 0." if bold_every == 0 raise ArgumentError, ":bolding can't be 0." if bold_every == 0
proc do |line| proc do |line|
...@@ -51,12 +61,12 @@ module Encoders ...@@ -51,12 +61,12 @@ module Encoders
case mode case mode
when :inline when :inline
max_width = (start + line_count).to_s.size max_width = (start + line_count).to_s.size
line = start line_number = start
gsub!(/^/) do gsub!(/^/) do
line_number = bolding.call line line_number_text = bolding.call line_number
indent = ' ' * (max_width - line.to_s.size) indent = ' ' * (max_width - line_number.to_s.size) # TODO: Optimize (10^x)
res = "<span class=\"no\">#{indent}#{line_number}</span> " res = "<span class=\"no\">#{indent}#{line_number_text}</span> "
line += 1 line_number += 1
res res
end end
...@@ -65,12 +75,12 @@ module Encoders ...@@ -65,12 +75,12 @@ module Encoders
# Because even monospace fonts seem to have different heights when bold, # Because even monospace fonts seem to have different heights when bold,
# I make the newline bold, both in the code and the line numbers. # I make the newline bold, both in the code and the line numbers.
# FIXME Still not working perfect for Mr. Internet Exploder # FIXME Still not working perfect for Mr. Internet Exploder
# FIXME Firefox struggles with very long codes (> 200 lines)
line_numbers = (start ... start + line_count).to_a.map(&bolding).join("\n") line_numbers = (start ... start + line_count).to_a.map(&bolding).join("\n")
line_numbers << "\n" # also for Mr. MS Internet Exploder :-/ line_numbers << "\n" # also for Mr. MS Internet Exploder :-/
line_numbers.gsub!(/\n/) { "<tt>\n</tt>" } line_numbers.gsub!(/\n/) { "<tt>\n</tt>" }
line_numbers_table_tpl = TABLE.apply('LINE_NUMBERS', line_numbers) line_numbers_table_tpl = TABLE.apply('LINE_NUMBERS', line_numbers)
gsub!(/<\/div>\n/) { '</div>' }
gsub!(/\n/) { "<tt>\n</tt>" } gsub!(/\n/) { "<tt>\n</tt>" }
wrap_in! line_numbers_table_tpl wrap_in! line_numbers_table_tpl
@wrapped_in = :div @wrapped_in = :div
...@@ -90,8 +100,9 @@ module Encoders ...@@ -90,8 +100,9 @@ module Encoders
end end
close = '</span>' * opened_tags.size close = '</span>' * opened_tags.size
"<li>#{open}#{line}#{close}</li>" "<li>#{open}#{line}#{close}</li>\n"
end end
chomp!("\n")
wrap_in! LIST wrap_in! LIST
@wrapped_in = :div @wrapped_in = :div
......
...@@ -86,6 +86,11 @@ module Encoders ...@@ -86,6 +86,11 @@ module Encoders
Template.wrap! self, template, 'CONTENT' Template.wrap! self, template, 'CONTENT'
self self
end end
def apply_title! title
self.sub!(/(<title>)(<\/title>)/) { $1 + title + $2 }
self
end
def wrap! element, *args def wrap! element, *args
return self if not element or element == wrapped_in return self if not element or element == wrapped_in
...@@ -100,6 +105,10 @@ module Encoders ...@@ -100,6 +105,10 @@ module Encoders
wrap! :div if wrapped_in? nil wrap! :div if wrapped_in? nil
raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? :div raise "Can't wrap %p in %p" % [wrapped_in, element] unless wrapped_in? :div
wrap_in! Output.page_template_for_css(@css) wrap_in! Output.page_template_for_css(@css)
if args.first.is_a?(Hash) && title = args.first[:title]
apply_title! title
end
self
when nil when nil
return self return self
else else
...@@ -166,7 +175,9 @@ module Encoders ...@@ -166,7 +175,9 @@ module Encoders
# title="double click to expand" # title="double click to expand"
LIST = <<-`LIST` LIST = <<-`LIST`
<ol class="CodeRay"><%CONTENT%></ol> <ol class="CodeRay">
<%CONTENT%>
</ol>
LIST LIST
PAGE = <<-`PAGE` PAGE = <<-`PAGE`
...@@ -175,7 +186,7 @@ module Encoders ...@@ -175,7 +186,7 @@ module Encoders
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="de"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="de">
<head> <head>
<meta http-equiv="content-type" content="text/html; charset=utf-8" /> <meta http-equiv="content-type" content="text/html; charset=utf-8" />
<title>CodeRay HTML Encoder Example</title> <title></title>
<style type="text/css"> <style type="text/css">
<%CSS%> <%CSS%>
</style> </style>
......
($:.unshift '../..'; require 'coderay') unless defined? CodeRay
module CodeRay
module Encoders
# = JSON Encoder
class JSON < Encoder
register_for :json
FILE_EXTENSION = 'json'
protected
def setup options
begin
require 'json'
rescue LoadError
require 'rubygems'
require 'json'
end
@out = []
end
def text_token text, kind
{ :type => 'text', :text => text, :kind => kind }
end
def block_token action, kind
{ :type => 'block', :action => action, :kind => kind }
end
def finish options
@out.to_json
end
end
end
end
if $0 == __FILE__
$VERBOSE = true
$: << File.join(File.dirname(__FILE__), '..')
eval DATA.read, nil, $0, __LINE__ + 4
end
__END__
require 'test/unit'
$:.delete '.'
require 'rubygems' if RUBY_VERSION < '1.9'
class JSONEncoderTest < Test::Unit::TestCase
def test_json_output
tokens = CodeRay.scan <<-RUBY, :ruby
puts "Hello world!"
RUBY
require 'json'
assert_equal [
{"type"=>"text", "text"=>"puts", "kind"=>"ident"},
{"type"=>"text", "text"=>" ", "kind"=>"space"},
{"type"=>"block", "action"=>"open", "kind"=>"string"},
{"type"=>"text", "text"=>"\"", "kind"=>"delimiter"},
{"type"=>"text", "text"=>"Hello world!", "kind"=>"content"},
{"type"=>"text", "text"=>"\"", "kind"=>"delimiter"},
{"type"=>"block", "action"=>"close", "kind"=>"string"},
{"type"=>"text", "text"=>"\n", "kind"=>"space"}
], JSON.load(tokens.json)
end
end
\ No newline at end of file
($:.unshift '../..'; require 'coderay') unless defined? CodeRay
module CodeRay
module Encoders
# Counts the LoC (Lines of Code). Returns an Integer >= 0.
#
# Alias: :loc
#
# Everything that is not comment, markup, doctype/shebang, or an empty line,
# is considered to be code.
#
# For example,
# * HTML files not containing JavaScript have 0 LoC
# * in a Java class without comments, LoC is the number of non-empty lines
#
# A Scanner class should define the token kinds that are not code in the
# KINDS_NOT_LOC constant, which defaults to [:comment, :doctype].
class LinesOfCode < Encoder
register_for :lines_of_code
NON_EMPTY_LINE = /^\s*\S.*$/
def compile tokens, options
if scanner = tokens.scanner
kinds_not_loc = scanner.class::KINDS_NOT_LOC
else
warn ArgumentError, 'Tokens have no scanner.' if $VERBOSE
kinds_not_loc = CodeRay::Scanners::Scanner::KINDS_NOT_LOC
end
code = tokens.token_class_filter :exclude => kinds_not_loc
@loc = code.text.scan(NON_EMPTY_LINE).size
end
def finish options
@loc
end
end
end
end
if $0 == __FILE__
$VERBOSE = true
$: << File.join(File.dirname(__FILE__), '..')
eval DATA.read, nil, $0, __LINE__ + 4
end
__END__
require 'test/unit'
class LinesOfCodeTest < Test::Unit::TestCase
def test_creation
assert CodeRay::Encoders::LinesOfCode < CodeRay::Encoders::Encoder
filter = nil
assert_nothing_raised do
filter = CodeRay.encoder :loc
end
assert_kind_of CodeRay::Encoders::LinesOfCode, filter
assert_nothing_raised do
filter = CodeRay.encoder :lines_of_code
end
assert_kind_of CodeRay::Encoders::LinesOfCode, filter
end
def test_lines_of_code
tokens = CodeRay.scan <<-RUBY, :ruby
#!/usr/bin/env ruby
# a minimal Ruby program
puts "Hello world!"
RUBY
assert_equal 1, CodeRay::Encoders::LinesOfCode.new.encode_tokens(tokens)
assert_equal 1, tokens.lines_of_code
assert_equal 1, tokens.loc
end
def test_filtering_block_tokens
tokens = CodeRay::Tokens.new
tokens << ["Hello\n", :world]
tokens << ["Hello\n", :space]
tokens << ["Hello\n", :comment]
assert_equal 2, CodeRay::Encoders::LinesOfCode.new.encode_tokens(tokens)
assert_equal 2, tokens.lines_of_code
assert_equal 2, tokens.loc
end
end
\ No newline at end of file
...@@ -9,11 +9,10 @@ module Encoders ...@@ -9,11 +9,10 @@ module Encoders
register_for :page register_for :page
DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({ DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge \
:css => :class, :css => :class,
:wrap => :page, :wrap => :page,
:line_numbers => :table :line_numbers => :table
})
end end
......
...@@ -9,10 +9,9 @@ module Encoders ...@@ -9,10 +9,9 @@ module Encoders
register_for :span register_for :span
DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge({ DEFAULT_OPTIONS = HTML::DEFAULT_OPTIONS.merge \
:css => :style, :css => :style,
:wrap => :span, :wrap => :span
})
end end
......
# encoders/term.rb
# By Rob Aldred (http://robaldred.co.uk)
# Based on idea by Nathan Weizenbaum (http://nex-3.com)
# MIT License (http://www.opensource.org/licenses/mit-license.php)
#
# A CodeRay encoder that outputs code highlighted for a color terminal.
# Check out http://robaldred.co.uk
module CodeRay
module Encoders
class Term < Encoder
register_for :term
TOKEN_COLORS = {
:annotation => '35',
:attribute_name => '33',
:attribute_name_fat => '33',
:attribute_value => '31',
:attribute_value_fat => '31',
:bin => '1;35',
:char => {:self => '36', :delimiter => '34'},
:class => '1;35',
:class_variable => '36',
:color => '32',
:comment => '37',
:complex => '34',
:constant => ['34', '4'],
:decoration => '35',
:definition => '1;32',
:directive => ['32', '4'],
:doc => '46',
:doctype => '1;30',
:doc_string => ['31', '4'],
:entity => '33',
:error => ['1;33', '41'],
:exception => '1;31',
:float => '1;35',
:function => '1;34',
:global_variable => '42',
:hex => '1;36',
:important => '1;31',
:include => '33',
:integer => '1;34',
:interpreted => '1;35',
:key => '35',
:label => '1;4',
:local_variable => '33',
:oct => '1;35',
:operator_name => '1;29',
:pre_constant => '1;36',
:pre_type => '1;30',
:predefined => ['4', '1;34'],
:preprocessor => '36',
:pseudo_class => '34',
:regexp => {
:content => '31',
:delimiter => '1;29',
:modifier => '35',
:function => '1;29'
},
:reserved => '1;31',
:shell => {
:self => '42',
:content => '1;29',
:delimiter => '37',
},
:string => {
:self => '32',
:modifier => '1;32',
:escape => '1;36',
:delimiter => '1;32',
},
:symbol => '1;32',
:tag => '34',
:tag_fat => '1;34',
:tag_special => ['34', '4'],
:type => '1;34',
:value => '36',
:variable => '34',
:insert => '42',
:delete => '41',
:change => '44',
:head => '45',
}
TOKEN_COLORS[:keyword] = TOKEN_COLORS[:reserved]
TOKEN_COLORS[:method] = TOKEN_COLORS[:function]
TOKEN_COLORS[:imaginary] = TOKEN_COLORS[:complex]
TOKEN_COLORS[:open] = TOKEN_COLORS[:close] = TOKEN_COLORS[:nesting_delimiter] = TOKEN_COLORS[:escape] = TOKEN_COLORS[:delimiter]
protected
def setup(options)
@out = ''
@opened = [nil]
@subcolors = nil
end
def finish(options)
super
end
def token text, type = :plain
case text
when nil
# raise 'Token with nil as text was given: %p' % [[text, type]]
when String
if color = (@subcolors || TOKEN_COLORS)[type]
color = color[:self] || return if Hash === color
@out << col(color) + text.gsub("\n", col(0) + "\n" + col(color)) + col(0)
@out << col(@subcolors[:self]) if @subcolors && @subcolors[:self]
else
@out << text
end
# token groups, eg. strings
when :open
@opened[0] = type
if color = TOKEN_COLORS[type]
if Hash === color
@subcolors = color
@out << col(color[:self]) if color[:self]
else
@subcolors = {}
@out << col(color)
end
end
@opened << type
when :close
if @opened.empty?
# nothing to close
else
@out << col(0) if (@subcolors || {})[:self]
@subcolors = nil
@opened.pop
end
# whole lines to be highlighted, eg. a added/modified/deleted lines in a diff
when :begin_line
when :end_line
else
raise 'unknown token kind: %p' % [text]
end
end
private
def col(color)
Array(color).map { |c| "\e[#{c}m" }.join
end
end
end
end
\ No newline at end of file
...@@ -14,16 +14,16 @@ module Encoders ...@@ -14,16 +14,16 @@ module Encoders
protected protected
def setup options def setup options
@out = '' super
@sep = options[:separator] @sep = options[:separator]
end end
def token text, kind def text_token text, kind
@out << text + @sep if text.is_a? ::String text + @sep
end end
def finish options def finish options
@out.chomp @sep super.chomp @sep
end end
end end
......
($:.unshift '../..'; require 'coderay') unless defined? CodeRay
module CodeRay
module Encoders
load :filter
class TokenClassFilter < Filter
include Streamable
register_for :token_class_filter
DEFAULT_OPTIONS = {
:exclude => [],
:include => :all
}
protected
def setup options
super
@exclude = options[:exclude]
@exclude = Array(@exclude) unless @exclude == :all
@include = options[:include]
@include = Array(@include) unless @include == :all
end
def include_text_token? text, kind
(@include == :all || @include.include?(kind)) &&
!(@exclude == :all || @exclude.include?(kind))
end
end
end
end
if $0 == __FILE__
$VERBOSE = true
$: << File.join(File.dirname(__FILE__), '..')
eval DATA.read, nil, $0, __LINE__ + 4
end
__END__
require 'test/unit'
class TokenClassFilterTest < Test::Unit::TestCase
def test_creation
assert CodeRay::Encoders::TokenClassFilter < CodeRay::Encoders::Encoder
assert CodeRay::Encoders::TokenClassFilter < CodeRay::Encoders::Filter
filter = nil
assert_nothing_raised do
filter = CodeRay.encoder :token_class_filter
end
assert_instance_of CodeRay::Encoders::TokenClassFilter, filter
end
def test_filtering_text_tokens
tokens = CodeRay::Tokens.new
for i in 1..10
tokens << [i.to_s, :index]
tokens << [' ', :space] if i < 10
end
assert_equal 10, CodeRay::Encoders::TokenClassFilter.new.encode_tokens(tokens, :exclude => :space).size
assert_equal 10, tokens.token_class_filter(:exclude => :space).size
assert_equal 9, CodeRay::Encoders::TokenClassFilter.new.encode_tokens(tokens, :include => :space).size
assert_equal 9, tokens.token_class_filter(:include => :space).size
assert_equal 0, CodeRay::Encoders::TokenClassFilter.new.encode_tokens(tokens, :exclude => :all).size
assert_equal 0, tokens.token_class_filter(:exclude => :all).size
end
def test_filtering_block_tokens
tokens = CodeRay::Tokens.new
10.times do |i|
tokens << [:open, :index]
tokens << [i.to_s, :content]
tokens << [:close, :index]
end
assert_equal 20, CodeRay::Encoders::TokenClassFilter.new.encode_tokens(tokens, :include => :blubb).size
assert_equal 20, tokens.token_class_filter(:include => :blubb).size
assert_equal 30, CodeRay::Encoders::TokenClassFilter.new.encode_tokens(tokens, :exclude => :index).size
assert_equal 30, tokens.token_class_filter(:exclude => :index).size
end
end
...@@ -29,6 +29,7 @@ module Encoders ...@@ -29,6 +29,7 @@ module Encoders
end end
def finish options def finish options
@out = ''
@doc.write @out, options[:pretty], options[:transitive], true @doc.write @out, options[:pretty], options[:transitive], true
@out @out
end end
......
module CodeRay
# A little hack to enable CodeRay highlighting in RedCloth.
#
# Usage:
# require 'coderay'
# require 'coderay/for_redcloth'
# RedCloth.new('@[ruby]puts "Hello, World!"@').to_html
#
# Make sure you have RedCloth 4.0.3 activated, for example by calling
# require 'rubygems'
# before RedCloth is loaded and before calling CodeRay.for_redcloth.
module ForRedCloth
def self.install
gem 'RedCloth', '>= 4.0.3' if defined? gem
require 'redcloth'
unless RedCloth::VERSION.to_s >= '4.0.3'
if defined? gem
raise 'CodeRay.for_redcloth needs RedCloth version 4.0.3 or later. ' +
"You have #{RedCloth::VERSION}. Please gem install RedCloth."
else
$".delete 'redcloth.rb' # sorry, but it works
require 'rubygems'
return install # retry
end
end
unless RedCloth::VERSION.to_s >= '4.2.2'
warn 'CodeRay.for_redcloth works best with RedCloth version 4.2.2 or later.'
end
RedCloth::TextileDoc.send :include, ForRedCloth::TextileDoc
RedCloth::Formatters::HTML.module_eval do
def unescape(html)
replacements = {
'&amp;' => '&',
'&quot;' => '"',
'&gt;' => '>',
'&lt;' => '<',
}
html.gsub(/&(?:amp|quot|[gl]t);/) { |entity| replacements[entity] }
end
undef code, bc_open, bc_close, escape_pre
def code(opts) # :nodoc:
opts[:block] = true
if !opts[:lang] && RedCloth::VERSION.to_s >= '4.2.0'
# simulating pre-4.2 behavior
if opts[:text].sub!(/\A\[(\w+)\]/, '')
if CodeRay::Scanners[$1].plugin_id == 'plaintext'
opts[:text] = $& + opts[:text]
else
opts[:lang] = $1
end
end
end
if opts[:lang] && !filter_coderay
require 'coderay'
@in_bc ||= nil
format = @in_bc ? :div : :span
opts[:text] = unescape(opts[:text]) unless @in_bc
highlighted_code = CodeRay.encode opts[:text], opts[:lang], format, :stream => true
highlighted_code.sub!(/\A<(span|div)/) { |m| m + pba(@in_bc || opts) }
highlighted_code
else
"<code#{pba(opts)}>#{opts[:text]}</code>"
end
end
def bc_open(opts) # :nodoc:
opts[:block] = true
@in_bc = opts
opts[:lang] ? '' : "<pre#{pba(opts)}>"
end
def bc_close(opts) # :nodoc:
opts = @in_bc
@in_bc = nil
opts[:lang] ? '' : "</pre>\n"
end
def escape_pre(text)
if @in_bc ||= nil
text
else
html_esc(text, :html_escape_preformatted)
end
end
end
end
module TextileDoc # :nodoc:
attr_accessor :filter_coderay
end
end
end
CodeRay::ForRedCloth.install
\ No newline at end of file
#!/usr/bin/env ruby
module CodeRay module CodeRay
# = FileType # = FileType
...@@ -33,12 +34,14 @@ module FileType ...@@ -33,12 +34,14 @@ module FileType
# That means you can get filetypes from files that don't exist. # That means you can get filetypes from files that don't exist.
def [] filename, read_shebang = false def [] filename, read_shebang = false
name = File.basename filename name = File.basename filename
ext = File.extname name ext = File.extname(name).sub(/^\./, '') # from last dot, delete the leading dot
ext.sub!(/^\./, '') # delete the leading dot ext2 = filename.to_s[/\.(.*)/, 1] # from first dot
type = type =
TypeFromExt[ext] || TypeFromExt[ext] ||
TypeFromExt[ext.downcase] || TypeFromExt[ext.downcase] ||
(TypeFromExt[ext2] if ext2) ||
(TypeFromExt[ext2.downcase] if ext2) ||
TypeFromName[name] || TypeFromName[name] ||
TypeFromName[name.downcase] TypeFromName[name.downcase]
type ||= shebang(filename) if read_shebang type ||= shebang(filename) if read_shebang
...@@ -49,8 +52,11 @@ module FileType ...@@ -49,8 +52,11 @@ module FileType
def shebang filename def shebang filename
begin begin
File.open filename, 'r' do |f| File.open filename, 'r' do |f|
first_line = f.gets if first_line = f.gets
first_line[TypeFromShebang] if type = first_line[TypeFromShebang]
type.to_sym
end
end
end end
rescue IOError rescue IOError
nil nil
...@@ -77,30 +83,46 @@ module FileType ...@@ -77,30 +83,46 @@ module FileType
end end
TypeFromExt = { TypeFromExt = {
'rb' => :ruby,
'rbw' => :ruby,
'rake' => :ruby,
'mab' => :ruby,
'cpp' => :c,
'c' => :c, 'c' => :c,
'css' => :css,
'diff' => :diff,
'dpr' => :delphi,
'groovy' => :groovy,
'gvy' => :groovy,
'h' => :c, 'h' => :c,
'java' => :java,
'js' => :javascript,
'xml' => :xml,
'htm' => :html, 'htm' => :html,
'html' => :html, 'html' => :html,
'html.erb' => :rhtml,
'java' => :java,
'js' => :java_script,
'json' => :json,
'mab' => :ruby,
'pas' => :delphi,
'patch' => :diff,
'php' => :php, 'php' => :php,
'php3' => :php, 'php3' => :php,
'php4' => :php, 'php4' => :php,
'php5' => :php, 'php5' => :php,
'xhtml' => :xhtml, 'py' => :python,
'py3' => :python,
'pyw' => :python,
'rake' => :ruby,
'raydebug' => :debug, 'raydebug' => :debug,
'rb' => :ruby,
'rbw' => :ruby,
'rhtml' => :rhtml, 'rhtml' => :rhtml,
'ss' => :scheme, 'rxml' => :ruby,
'sch' => :scheme, 'sch' => :scheme,
'sql' => :sql,
'ss' => :scheme,
'xhtml' => :xhtml,
'xml' => :xml,
'yaml' => :yaml, 'yaml' => :yaml,
'yml' => :yaml, 'yml' => :yaml,
} }
for cpp_alias in %w[cc cpp cp cxx c++ C hh hpp h++ cu]
TypeFromExt[cpp_alias] = :cpp
end
TypeFromShebang = /\b(?:ruby|perl|python|sh)\b/ TypeFromShebang = /\b(?:ruby|perl|python|sh)\b/
...@@ -115,15 +137,16 @@ end ...@@ -115,15 +137,16 @@ end
if $0 == __FILE__ if $0 == __FILE__
$VERBOSE = true $VERBOSE = true
eval DATA.read, nil, $0, __LINE__+4 eval DATA.read, nil, $0, __LINE__ + 4
end end
__END__ __END__
require 'test/unit' require 'test/unit'
class TC_FileType < Test::Unit::TestCase class FileTypeTests < Test::Unit::TestCase
include CodeRay
def test_fetch def test_fetch
assert_raise FileType::UnknownFileType do assert_raise FileType::UnknownFileType do
FileType.fetch '' FileType.fetch ''
...@@ -150,6 +173,8 @@ class TC_FileType < Test::Unit::TestCase ...@@ -150,6 +173,8 @@ class TC_FileType < Test::Unit::TestCase
def test_ruby def test_ruby
assert_equal :ruby, FileType['test.rb'] assert_equal :ruby, FileType['test.rb']
assert_equal :ruby, FileType['test.java.rb']
assert_equal :java, FileType['test.rb.java']
assert_equal :ruby, FileType['C:\\Program Files\\x\\y\\c\\test.rbw'] assert_equal :ruby, FileType['C:\\Program Files\\x\\y\\c\\test.rbw']
assert_equal :ruby, FileType['/usr/bin/something/Rakefile'] assert_equal :ruby, FileType['/usr/bin/something/Rakefile']
assert_equal :ruby, FileType['~/myapp/gem/Rantfile'] assert_equal :ruby, FileType['~/myapp/gem/Rantfile']
...@@ -169,11 +194,23 @@ class TC_FileType < Test::Unit::TestCase ...@@ -169,11 +194,23 @@ class TC_FileType < Test::Unit::TestCase
assert_not_equal :c, FileType['~/projects/blabla/c'] assert_not_equal :c, FileType['~/projects/blabla/c']
end end
def test_cpp
assert_equal :cpp, FileType['test.c++']
assert_equal :cpp, FileType['test.cxx']
assert_equal :cpp, FileType['test.hh']
assert_equal :cpp, FileType['test.hpp']
assert_equal :cpp, FileType['test.cu']
assert_equal :cpp, FileType['test.C']
assert_not_equal :cpp, FileType['test.c']
assert_not_equal :cpp, FileType['test.h']
end
def test_html def test_html
assert_equal :html, FileType['test.htm'] assert_equal :html, FileType['test.htm']
assert_equal :xhtml, FileType['test.xhtml'] assert_equal :xhtml, FileType['test.xhtml']
assert_equal :xhtml, FileType['test.html.xhtml'] assert_equal :xhtml, FileType['test.html.xhtml']
assert_equal :rhtml, FileType['_form.rhtml'] assert_equal :rhtml, FileType['_form.rhtml']
assert_equal :rhtml, FileType['_form.html.erb']
end end
def test_yaml def test_yaml
...@@ -183,7 +220,16 @@ class TC_FileType < Test::Unit::TestCase ...@@ -183,7 +220,16 @@ class TC_FileType < Test::Unit::TestCase
assert_not_equal :yaml, FileType['YAML'] assert_not_equal :yaml, FileType['YAML']
end end
def test_shebang def test_pathname
require 'pathname'
pn = Pathname.new 'test.rb'
assert_equal :ruby, FileType[pn]
dir = Pathname.new '/etc/var/blubb'
assert_equal :ruby, FileType[dir + pn]
assert_equal :cpp, FileType[dir + 'test.cpp']
end
def test_no_shebang
dir = './test' dir = './test'
if File.directory? dir if File.directory? dir
Dir.chdir dir do Dir.chdir dir do
...@@ -191,5 +237,19 @@ class TC_FileType < Test::Unit::TestCase ...@@ -191,5 +237,19 @@ class TC_FileType < Test::Unit::TestCase
end end
end end
end end
def test_shebang_empty_file
require 'tmpdir'
tmpfile = File.join(Dir.tmpdir, 'bla')
File.open(tmpfile, 'w') { } # touch
assert_equal nil, FileType[tmpfile]
end
def test_shebang
require 'tmpdir'
tmpfile = File.join(Dir.tmpdir, 'bla')
File.open(tmpfile, 'w') { |f| f.puts '#!/usr/bin/env ruby' }
assert_equal :ruby, FileType[tmpfile, true]
end
end end
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
# #
# A simplified interface to the gzip library +zlib+ (from the Ruby Standard Library.) # A simplified interface to the gzip library +zlib+ (from the Ruby Standard Library.)
# #
# Author: murphy (mail to murphy cYcnus de) # Author: murphy (mail to murphy rubychan de)
# #
# Version: 0.2 (2005.may.28) # Version: 0.2 (2005.may.28)
# #
......
...@@ -2,8 +2,6 @@ module CodeRay ...@@ -2,8 +2,6 @@ module CodeRay
# = PluginHost # = PluginHost
# #
# $Id: plugin.rb 220 2007-01-01 02:58:58Z murphy $
#
# A simple subclass plugin system. # A simple subclass plugin system.
# #
# Example: # Example:
...@@ -22,7 +20,7 @@ module CodeRay ...@@ -22,7 +20,7 @@ module CodeRay
# #
# Generators[:fancy] #-> FancyGenerator # Generators[:fancy] #-> FancyGenerator
# # or # # or
# require_plugin 'Generators/fancy' # CodeRay.require_plugin 'Generators/fancy'
module PluginHost module PluginHost
# Raised if Encoders::[] fails because: # Raised if Encoders::[] fails because:
...@@ -135,9 +133,13 @@ module PluginHost ...@@ -135,9 +133,13 @@ module PluginHost
# map :navy => :dark_blue # map :navy => :dark_blue
# default :gray # default :gray
# end # end
def default id def default id = nil
id = validate_id id if id
plugin_hash[nil] = id id = validate_id id
plugin_hash[nil] = id
else
plugin_hash[nil]
end
end end
# Every plugin must register itself for one or more # Every plugin must register itself for one or more
...@@ -174,7 +176,7 @@ module PluginHost ...@@ -174,7 +176,7 @@ module PluginHost
def inspect def inspect
map = plugin_hash.dup map = plugin_hash.dup
map.each do |id, plugin| map.each do |id, plugin|
map[id] = plugin.to_s[/(?>[\w_]+)$/] map[id] = plugin.to_s[/(?>\w+)$/]
end end
"#{name}[#{host_id}]#{map.inspect}" "#{name}[#{host_id}]#{map.inspect}"
end end
...@@ -212,7 +214,7 @@ protected ...@@ -212,7 +214,7 @@ protected
mapfile = path_to '_map' mapfile = path_to '_map'
if File.exist? mapfile if File.exist? mapfile
require mapfile require mapfile
elsif $DEBUG elsif $VERBOSE
warn 'no _map.rb found for %s' % name warn 'no _map.rb found for %s' % name
end end
end end
...@@ -241,7 +243,7 @@ protected ...@@ -241,7 +243,7 @@ protected
id id
elsif id.is_a? String elsif id.is_a? String
if id[/\w+/] == id if id[/\w+/] == id
id.to_sym id.downcase.to_sym
else else
raise ArgumentError, "Invalid id: '#{id}' given." raise ArgumentError, "Invalid id: '#{id}' given."
end end
...@@ -278,6 +280,16 @@ module Plugin ...@@ -278,6 +280,16 @@ module Plugin
def register_for *ids def register_for *ids
plugin_host.register self, *ids plugin_host.register self, *ids
end end
# Returns the title of the plugin, or sets it to the
# optional argument +title+.
def title title = nil
if title
@title = title.to_s
else
@title ||= name[/([^:]+)$/, 1]
end
end
# The host for this Plugin class. # The host for this Plugin class.
def plugin_host host = nil def plugin_host host = nil
...@@ -299,15 +311,23 @@ module Plugin ...@@ -299,15 +311,23 @@ module Plugin
# #
# The above example loads the file myplugin/my_helper.rb relative to the # The above example loads the file myplugin/my_helper.rb relative to the
# file in which MyPlugin was defined. # file in which MyPlugin was defined.
#
# You can also load a helper from a different plugin:
#
# helper 'other_plugin/helper_name'
def helper *helpers def helper *helpers
for helper in helpers for helper in helpers
self::PLUGIN_HOST.require_helper plugin_id, helper.to_s if helper.is_a?(String) && helper[/\//]
self::PLUGIN_HOST.require_helper $`, $'
else
self::PLUGIN_HOST.require_helper plugin_id, helper.to_s
end
end end
end end
# Returns the pulgin id used by the engine. # Returns the pulgin id used by the engine.
def plugin_id def plugin_id
name[/[\w_]+$/].downcase name[/\w+$/].downcase
end end
end end
...@@ -318,7 +338,7 @@ end ...@@ -318,7 +338,7 @@ end
# CodeRay.require_plugin '<Host ID>/<Plugin ID>' # CodeRay.require_plugin '<Host ID>/<Plugin ID>'
# #
# Returns the loaded plugin. # Returns the loaded plugin.
def require_plugin path def self.require_plugin path
host_id, plugin_id = path.split '/', 2 host_id, plugin_id = path.split '/', 2
host = PluginHost.host_by_id(host_id) host = PluginHost.host_by_id(host_id)
raise PluginHost::HostNotFound, raise PluginHost::HostNotFound,
......
...@@ -98,15 +98,22 @@ class CaseIgnoringWordList < WordList ...@@ -98,15 +98,22 @@ class CaseIgnoringWordList < WordList
# Creates a new case-insensitive WordList with +default+ as default value. # Creates a new case-insensitive WordList with +default+ as default value.
# #
# You can activate caching to store the results for every [] request. # You can activate caching to store the results for every [] request.
# This speeds up subsequent lookups for the same word, but also
# uses memory.
def initialize default = false, caching = false def initialize default = false, caching = false
if caching if caching
super(default, false) do |h, k| super(default, false) do |h, k|
h[k] = h.fetch k.downcase, default h[k] = h.fetch k.downcase, default
end end
else else
def self.[] key # :nodoc: super(default, false)
super(key.downcase) extend Uncached
end end
end
module Uncached # :nodoc:
def [] key
super(key.downcase)
end end
end end
...@@ -120,4 +127,12 @@ class CaseIgnoringWordList < WordList ...@@ -120,4 +127,12 @@ class CaseIgnoringWordList < WordList
end end
end end
\ No newline at end of file
__END__
# check memory consumption
END {
ObjectSpace.each_object(CodeRay::CaseIgnoringWordList) do |wl|
p wl.inject(0) { |memo, key, value| memo + key.size + 24 }
end
}
\ No newline at end of file
...@@ -4,8 +4,6 @@ module CodeRay ...@@ -4,8 +4,6 @@ module CodeRay
# = Scanners # = Scanners
# #
# $Id: scanner.rb 222 2007-01-01 16:26:17Z murphy $
#
# This module holds the Scanner class and its subclasses. # This module holds the Scanner class and its subclasses.
# For example, the Ruby scanner is named CodeRay::Scanners::Ruby # For example, the Ruby scanner is named CodeRay::Scanners::Ruby
# can be found in coderay/scanners/ruby. # can be found in coderay/scanners/ruby.
...@@ -45,6 +43,7 @@ module CodeRay ...@@ -45,6 +43,7 @@ module CodeRay
# You can also use +map+, +any?+, +find+ and even +sort_by+, # You can also use +map+, +any?+, +find+ and even +sort_by+,
# if you want. # if you want.
class Scanner < StringScanner class Scanner < StringScanner
extend Plugin extend Plugin
plugin_host Scanners plugin_host Scanners
...@@ -57,6 +56,8 @@ module CodeRay ...@@ -57,6 +56,8 @@ module CodeRay
# #
# Define @default_options for subclasses. # Define @default_options for subclasses.
DEFAULT_OPTIONS = { :stream => false } DEFAULT_OPTIONS = { :stream => false }
KINDS_NOT_LOC = [:comment, :doctype]
class << self class << self
...@@ -66,7 +67,19 @@ module CodeRay ...@@ -66,7 +67,19 @@ module CodeRay
end end
def normify code def normify code
code = code.to_s.to_unix code = code.to_s
if code.respond_to? :force_encoding
debug, $DEBUG = $DEBUG, false
begin
code.force_encoding 'utf-8'
code[/\z/] # raises an ArgumentError when code contains a non-UTF-8 char
rescue ArgumentError
code.force_encoding 'binary'
ensure
$DEBUG = debug
end
end
code.to_unix
end end
def file_extension extension = nil def file_extension extension = nil
...@@ -75,7 +88,7 @@ module CodeRay ...@@ -75,7 +88,7 @@ module CodeRay
else else
@file_extension ||= plugin_id.to_s @file_extension ||= plugin_id.to_s
end end
end end
end end
...@@ -104,9 +117,10 @@ module CodeRay ...@@ -104,9 +117,10 @@ module CodeRay
# #
# Else, a Tokens object is used. # Else, a Tokens object is used.
def initialize code='', options = {}, &block def initialize code='', options = {}, &block
@options = self.class::DEFAULT_OPTIONS.merge options
raise "I am only the basic Scanner class. I can't scan "\ raise "I am only the basic Scanner class. I can't scan "\
"anything. :( Use my subclasses." if self.class == Scanner "anything. :( Use my subclasses." if self.class == Scanner
@options = self.class::DEFAULT_OPTIONS.merge options
super Scanner.normify(code) super Scanner.normify(code)
...@@ -121,6 +135,7 @@ module CodeRay ...@@ -121,6 +135,7 @@ module CodeRay
"but :stream is #{@options[:stream]}" if block_given? "but :stream is #{@options[:stream]}" if block_given?
@tokens ||= Tokens.new @tokens ||= Tokens.new
end end
@tokens.scanner = self
setup setup
end end
...@@ -140,6 +155,11 @@ module CodeRay ...@@ -140,6 +155,11 @@ module CodeRay
alias code string alias code string
alias code= string= alias code= string=
# Returns the Plugin ID for this scanner.
def lang
self.class.plugin_id
end
# Scans the code and returns all tokens in a Tokens object. # Scans the code and returns all tokens in a Tokens object.
def tokenize new_string=nil, options = {} def tokenize new_string=nil, options = {}
options = @options.merge(options) options = @options.merge(options)
...@@ -178,6 +198,24 @@ module CodeRay ...@@ -178,6 +198,24 @@ module CodeRay
def line def line
string[0..pos].count("\n") + 1 string[0..pos].count("\n") + 1
end end
def column pos = self.pos
return 0 if pos <= 0
string = string()
if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
@bin_string ||= string.dup.force_encoding('binary')
string = @bin_string
end
pos - (string.rindex(?\n, pos) || 0)
end
def marshal_dump
@options
end
def marshal_load options
@options = options
end
protected protected
...@@ -202,6 +240,7 @@ module CodeRay ...@@ -202,6 +240,7 @@ module CodeRay
def reset_instance def reset_instance
@tokens.clear unless @options[:keep_tokens] @tokens.clear unless @options[:keep_tokens]
@cached_tokens = nil @cached_tokens = nil
@bin_string = nil if defined? @bin_string
end end
# Scanner error with additional status information # Scanner error with additional status information
...@@ -214,7 +253,7 @@ module CodeRay ...@@ -214,7 +253,7 @@ module CodeRay
tokens: tokens:
%s %s
current line: %d pos = %d current line: %d column: %d pos: %d
matched: %p state: %p matched: %p state: %p
bol? = %p, eos? = %p bol? = %p, eos? = %p
...@@ -229,10 +268,10 @@ surrounding code: ...@@ -229,10 +268,10 @@ surrounding code:
msg, msg,
tokens.size, tokens.size,
tokens.last(10).map { |t| t.inspect }.join("\n"), tokens.last(10).map { |t| t.inspect }.join("\n"),
line, pos, line, column, pos,
matched, state, bol?, eos?, matched, state, bol?, eos?,
string[pos-ambit,ambit], string[pos - ambit, ambit],
string[pos,ambit], string[pos, ambit],
] ]
end end
......
module CodeRay module CodeRay
module Scanners module Scanners
map :cpp => :c, map \
:plain => :plaintext, :h => :c,
:pascal => :delphi, :cplusplus => :cpp,
:'c++' => :cpp,
:ecma => :java_script,
:ecmascript => :java_script,
:ecma_script => :java_script,
:irb => :ruby, :irb => :ruby,
:xml => :html, :javascript => :java_script,
:xhtml => :nitro_xhtml, :js => :java_script,
:nitro => :nitro_xhtml :nitro => :nitro_xhtml,
:pascal => :delphi,
:plain => :plaintext,
:xhtml => :html,
:yml => :yaml
default :plain default :plain
......
...@@ -3,42 +3,50 @@ module Scanners ...@@ -3,42 +3,50 @@ module Scanners
class C < Scanner class C < Scanner
register_for :c
include Streamable include Streamable
register_for :c
file_extension 'c'
RESERVED_WORDS = [ RESERVED_WORDS = [
'asm', 'break', 'case', 'continue', 'default', 'do', 'else', 'asm', 'break', 'case', 'continue', 'default', 'do',
'for', 'goto', 'if', 'return', 'switch', 'while', 'else', 'enum', 'for', 'goto', 'if', 'return',
'struct', 'union', 'enum', 'typedef', 'sizeof', 'struct', 'switch', 'typedef', 'union', 'while',
'static', 'register', 'auto', 'extern', 'restrict', # added in C99
'sizeof',
'volatile', 'const', # C89
'inline', 'restrict', # C99
] ]
PREDEFINED_TYPES = [ PREDEFINED_TYPES = [
'int', 'long', 'short', 'char', 'void', 'int', 'long', 'short', 'char',
'signed', 'unsigned', 'float', 'double', 'signed', 'unsigned', 'float', 'double',
'bool', 'complex', # C99 'bool', 'complex', # added in C99
] ]
PREDEFINED_CONSTANTS = [ PREDEFINED_CONSTANTS = [
'EOF', 'NULL', 'EOF', 'NULL',
'true', 'false', # C99 'true', 'false', # added in C99
]
DIRECTIVES = [
'auto', 'extern', 'register', 'static', 'void',
'const', 'volatile', # added in C89
'inline', # added in C99
] ]
IDENT_KIND = WordList.new(:ident). IDENT_KIND = WordList.new(:ident).
add(RESERVED_WORDS, :reserved). add(RESERVED_WORDS, :reserved).
add(PREDEFINED_TYPES, :pre_type). add(PREDEFINED_TYPES, :pre_type).
add(DIRECTIVES, :directive).
add(PREDEFINED_CONSTANTS, :pre_constant) add(PREDEFINED_CONSTANTS, :pre_constant)
ESCAPE = / [rbfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
def scan_tokens tokens, options def scan_tokens tokens, options
state = :initial state = :initial
label_expected = true
case_expected = false
label_expected_before_preproc_line = nil
in_preproc_line = false
until eos? until eos?
...@@ -49,8 +57,13 @@ module Scanners ...@@ -49,8 +57,13 @@ module Scanners
when :initial when :initial
if scan(/ \s+ | \\\n /x) if match = scan(/ \s+ | \\\n /x)
kind = :space if in_preproc_line && match != "\\\n" && match.index(?\n)
in_preproc_line = false
label_expected = label_expected_before_preproc_line
end
tokens << [match, :space]
next
elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
kind = :comment kind = :comment
...@@ -59,16 +72,32 @@ module Scanners ...@@ -59,16 +72,32 @@ module Scanners
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos? match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
kind = :comment kind = :comment
elsif scan(/ [-+*\/=<>?:;,!&^|()\[\]{}~%]+ | \.(?!\d) /x) elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
label_expected = match =~ /[;\{\}]/
if case_expected
label_expected = true if match == ':'
case_expected = false
end
kind = :operator kind = :operator
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x) elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
kind = IDENT_KIND[match] kind = IDENT_KIND[match]
if kind == :ident and check(/:(?!:)/) if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/)
match << scan(/:/)
kind = :label kind = :label
match << matched
else
label_expected = false
if kind == :reserved
case match
when 'case', 'default'
case_expected = true
end
end
end end
elsif scan(/\$/)
kind = :ident
elsif match = scan(/L?"/) elsif match = scan(/L?"/)
tokens << [:open, :string] tokens << [:open, :string]
if match[0] == ?L if match[0] == ?L
...@@ -78,23 +107,30 @@ module Scanners ...@@ -78,23 +107,30 @@ module Scanners
state = :string state = :string
kind = :delimiter kind = :delimiter
elsif scan(/#\s*(\w*)/) elsif scan(/#[ \t]*(\w*)/)
kind = :preprocessor # FIXME multiline preprocs kind = :preprocessor
in_preproc_line = true
label_expected_before_preproc_line = label_expected
state = :include_expected if self[1] == 'include' state = :include_expected if self[1] == 'include'
elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox) elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
label_expected = false
kind = :char kind = :char
elsif scan(/0[xX][0-9A-Fa-f]+/) elsif scan(/0[xX][0-9A-Fa-f]+/)
label_expected = false
kind = :hex kind = :hex
elsif scan(/(?:0[0-7]+)(?![89.eEfF])/) elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
label_expected = false
kind = :oct kind = :oct
elsif scan(/(?:\d+)(?![.eEfF])/) elsif scan(/(?:\d+)(?![.eEfF])L?L?/)
label_expected = false
kind = :integer kind = :integer
elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/) elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
label_expected = false
kind = :float kind = :float
else else
...@@ -110,6 +146,7 @@ module Scanners ...@@ -110,6 +146,7 @@ module Scanners
tokens << ['"', :delimiter] tokens << ['"', :delimiter]
tokens << [:close, :string] tokens << [:close, :string]
state = :initial state = :initial
label_expected = false
next next
elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox) elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
kind = :char kind = :char
...@@ -117,6 +154,7 @@ module Scanners ...@@ -117,6 +154,7 @@ module Scanners
tokens << [:close, :string] tokens << [:close, :string]
kind = :error kind = :error
state = :initial state = :initial
label_expected = false
else else
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
end end
...@@ -131,8 +169,8 @@ module Scanners ...@@ -131,8 +169,8 @@ module Scanners
state = :initial if match.index ?\n state = :initial if match.index ?\n
else else
getch state = :initial
kind = :error next
end end
...@@ -142,7 +180,7 @@ module Scanners ...@@ -142,7 +180,7 @@ module Scanners
end end
match ||= matched match ||= matched
if $DEBUG and not kind if $CODERAY_DEBUG and not kind
raise_inspect 'Error token %p in line %d' % raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens [[match, kind], line], tokens
end end
......
module CodeRay
module Scanners
class CPlusPlus < Scanner
include Streamable
register_for :cpp
file_extension 'cpp'
title 'C++'
# http://www.cppreference.com/wiki/keywords/start
RESERVED_WORDS = [
'and', 'and_eq', 'asm', 'bitand', 'bitor', 'break',
'case', 'catch', 'class', 'compl', 'const_cast',
'continue', 'default', 'delete', 'do', 'dynamic_cast', 'else',
'enum', 'export', 'for', 'goto', 'if', 'namespace', 'new',
'not', 'not_eq', 'or', 'or_eq', 'reinterpret_cast', 'return',
'sizeof', 'static_cast', 'struct', 'switch', 'template',
'throw', 'try', 'typedef', 'typeid', 'typename', 'union',
'while', 'xor', 'xor_eq'
]
PREDEFINED_TYPES = [
'bool', 'char', 'double', 'float', 'int', 'long',
'short', 'signed', 'unsigned', 'wchar_t', 'string'
]
PREDEFINED_CONSTANTS = [
'false', 'true',
'EOF', 'NULL',
]
PREDEFINED_VARIABLES = [
'this'
]
DIRECTIVES = [
'auto', 'const', 'explicit', 'extern', 'friend', 'inline', 'mutable', 'operator',
'private', 'protected', 'public', 'register', 'static', 'using', 'virtual', 'void',
'volatile'
]
IDENT_KIND = WordList.new(:ident).
add(RESERVED_WORDS, :reserved).
add(PREDEFINED_TYPES, :pre_type).
add(PREDEFINED_VARIABLES, :local_variable).
add(DIRECTIVES, :directive).
add(PREDEFINED_CONSTANTS, :pre_constant)
ESCAPE = / [rbfntv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
def scan_tokens tokens, options
state = :initial
label_expected = true
case_expected = false
label_expected_before_preproc_line = nil
in_preproc_line = false
until eos?
kind = nil
match = nil
case state
when :initial
if match = scan(/ \s+ | \\\n /x)
if in_preproc_line && match != "\\\n" && match.index(?\n)
in_preproc_line = false
label_expected = label_expected_before_preproc_line
end
tokens << [match, :space]
next
elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
kind = :comment
elsif match = scan(/ \# \s* if \s* 0 /x)
match << scan_until(/ ^\# (?:elif|else|endif) .*? $ | \z /xm) unless eos?
kind = :comment
elsif match = scan(/ [-+*=<>?:;,!&^|()\[\]{}~%]+ | \/=? | \.(?!\d) /x)
label_expected = match =~ /[;\{\}]/
if case_expected
label_expected = true if match == ':'
case_expected = false
end
kind = :operator
elsif match = scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
kind = IDENT_KIND[match]
if kind == :ident && label_expected && !in_preproc_line && scan(/:(?!:)/)
kind = :label
match << matched
else
label_expected = false
if kind == :reserved
case match
when 'class'
state = :class_name_expected
when 'case', 'default'
case_expected = true
end
end
end
elsif scan(/\$/)
kind = :ident
elsif match = scan(/L?"/)
tokens << [:open, :string]
if match[0] == ?L
tokens << ['L', :modifier]
match = '"'
end
state = :string
kind = :delimiter
elsif scan(/#[ \t]*(\w*)/)
kind = :preprocessor
in_preproc_line = true
label_expected_before_preproc_line = label_expected
state = :include_expected if self[1] == 'include'
elsif scan(/ L?' (?: [^\'\n\\] | \\ #{ESCAPE} )? '? /ox)
label_expected = false
kind = :char
elsif scan(/0[xX][0-9A-Fa-f]+/)
label_expected = false
kind = :hex
elsif scan(/(?:0[0-7]+)(?![89.eEfF])/)
label_expected = false
kind = :oct
elsif scan(/(?:\d+)(?![.eEfF])L?L?/)
label_expected = false
kind = :integer
elsif scan(/\d[fF]?|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
label_expected = false
kind = :float
else
getch
kind = :error
end
when :string
if scan(/[^\\"]+/)
kind = :content
elsif scan(/"/)
tokens << ['"', :delimiter]
tokens << [:close, :string]
state = :initial
label_expected = false
next
elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
kind = :char
elsif scan(/ \\ | $ /x)
tokens << [:close, :string]
kind = :error
state = :initial
label_expected = false
else
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
end
when :include_expected
if scan(/<[^>\n]+>?|"[^"\n\\]*(?:\\.[^"\n\\]*)*"?/)
kind = :include
state = :initial
elsif match = scan(/\s+/)
kind = :space
state = :initial if match.index ?\n
else
state = :initial
next
end
when :class_name_expected
if scan(/ [A-Za-z_][A-Za-z_0-9]* /x)
kind = :class
state = :initial
elsif match = scan(/\s+/)
kind = :space
else
getch
kind = :error
state = :initial
end
else
raise_inspect 'Unknown state', tokens
end
match ||= matched
if $CODERAY_DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens
end
raise_inspect 'Empty token', tokens unless match
tokens << [match, kind]
end
if state == :string
tokens << [:close, :string]
end
tokens
end
end
end
end
module CodeRay
module Scanners
class CSS < Scanner
register_for :css
KINDS_NOT_LOC = [
:comment,
:class, :pseudo_class, :type,
:constant, :directive,
:key, :value, :operator, :color, :float,
:error, :important,
]
module RE
NonASCII = /[\x80-\xFF]/
Hex = /[0-9a-fA-F]/
Unicode = /\\#{Hex}{1,6}(?:\r\n|\s)?/ # differs from standard because it allows uppercase hex too
Escape = /#{Unicode}|\\[^\r\n\f0-9a-fA-F]/
NMChar = /[-_a-zA-Z0-9]|#{NonASCII}|#{Escape}/
NMStart = /[_a-zA-Z]|#{NonASCII}|#{Escape}/
NL = /\r\n|\r|\n|\f/
String1 = /"(?:[^\n\r\f\\"]|\\#{NL}|#{Escape})*"?/ # FIXME: buggy regexp
String2 = /'(?:[^\n\r\f\\']|\\#{NL}|#{Escape})*'?/ # FIXME: buggy regexp
String = /#{String1}|#{String2}/
HexColor = /#(?:#{Hex}{6}|#{Hex}{3})/
Color = /#{HexColor}/
Num = /-?(?:[0-9]+|[0-9]*\.[0-9]+)/
Name = /#{NMChar}+/
Ident = /-?#{NMStart}#{NMChar}*/
AtKeyword = /@#{Ident}/
Percentage = /#{Num}%/
reldimensions = %w[em ex px]
absdimensions = %w[in cm mm pt pc]
Unit = Regexp.union(*(reldimensions + absdimensions))
Dimension = /#{Num}#{Unit}/
Comment = %r! /\* (?: .*? \*/ | .* ) !mx
Function = /(?:url|alpha)\((?:[^)\n\r\f]|\\\))*\)?/
Id = /##{Name}/
Class = /\.#{Name}/
PseudoClass = /:#{Name}/
AttributeSelector = /\[[^\]]*\]?/
end
def scan_tokens tokens, options
value_expected = nil
states = [:initial]
until eos?
kind = nil
match = nil
if scan(/\s+/)
kind = :space
elsif case states.last
when :initial, :media
if scan(/(?>#{RE::Ident})(?!\()|\*/ox)
kind = :type
elsif scan RE::Class
kind = :class
elsif scan RE::Id
kind = :constant
elsif scan RE::PseudoClass
kind = :pseudo_class
elsif match = scan(RE::AttributeSelector)
# TODO: Improve highlighting inside of attribute selectors.
tokens << [:open, :string]
tokens << [match[0,1], :delimiter]
tokens << [match[1..-2], :content] if match.size > 2
tokens << [match[-1,1], :delimiter] if match[-1] == ?]
tokens << [:close, :string]
next
elsif match = scan(/@media/)
kind = :directive
states.push :media_before_name
end
when :block
if scan(/(?>#{RE::Ident})(?!\()/ox)
if value_expected
kind = :value
else
kind = :key
end
end
when :media_before_name
if scan RE::Ident
kind = :type
states[-1] = :media_after_name
end
when :media_after_name
if scan(/\{/)
kind = :operator
states[-1] = :media
end
when :comment
if scan(/(?:[^*\s]|\*(?!\/))+/)
kind = :comment
elsif scan(/\*\//)
kind = :comment
states.pop
elsif scan(/\s+/)
kind = :space
end
else
raise_inspect 'Unknown state', tokens
end
elsif scan(/\/\*/)
kind = :comment
states.push :comment
elsif scan(/\{/)
value_expected = false
kind = :operator
states.push :block
elsif scan(/\}/)
value_expected = false
if states.last == :block || states.last == :media
kind = :operator
states.pop
else
kind = :error
end
elsif match = scan(/#{RE::String}/o)
tokens << [:open, :string]
tokens << [match[0, 1], :delimiter]
tokens << [match[1..-2], :content] if match.size > 2
tokens << [match[-1, 1], :delimiter] if match.size >= 2
tokens << [:close, :string]
next
elsif match = scan(/#{RE::Function}/o)
tokens << [:open, :string]
start = match[/^\w+\(/]
tokens << [start, :delimiter]
if match[-1] == ?)
tokens << [match[start.size..-2], :content]
tokens << [')', :delimiter]
else
tokens << [match[start.size..-1], :content]
end
tokens << [:close, :string]
next
elsif scan(/(?: #{RE::Dimension} | #{RE::Percentage} | #{RE::Num} )/ox)
kind = :float
elsif scan(/#{RE::Color}/o)
kind = :color
elsif scan(/! *important/)
kind = :important
elsif scan(/rgb\([^()\n]*\)?/)
kind = :color
elsif scan(/#{RE::AtKeyword}/o)
kind = :directive
elsif match = scan(/ [+>:;,.=()\/] /x)
if match == ':'
value_expected = true
elsif match == ';'
value_expected = false
end
kind = :operator
else
getch
kind = :error
end
match ||= matched
if $CODERAY_DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens
end
raise_inspect 'Empty token', tokens unless match
tokens << [match, kind]
end
tokens
end
end
end
end
...@@ -6,6 +6,8 @@ module Scanners ...@@ -6,6 +6,8 @@ module Scanners
include Streamable include Streamable
register_for :debug register_for :debug
file_extension 'raydebug'
title 'CodeRay Token Dump'
protected protected
def scan_tokens tokens, options def scan_tokens tokens, options
...@@ -30,8 +32,8 @@ module Scanners ...@@ -30,8 +32,8 @@ module Scanners
opened_tokens << kind opened_tokens << kind
match = :open match = :open
elsif scan(/ > /x) elsif !opened_tokens.empty? && scan(/ > /x)
kind = opened_tokens.pop kind = opened_tokens.pop || :error
match = :close match = :close
else else
...@@ -41,7 +43,7 @@ module Scanners ...@@ -41,7 +43,7 @@ module Scanners
end end
match ||= matched match ||= matched
if $DEBUG and not kind if $CODERAY_DEBUG and not kind
raise_inspect 'Error token %p in line %d' % raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens [[match, kind], line], tokens
end end
......
...@@ -4,6 +4,7 @@ module Scanners ...@@ -4,6 +4,7 @@ module Scanners
class Delphi < Scanner class Delphi < Scanner
register_for :delphi register_for :delphi
file_extension 'pas'
RESERVED_WORDS = [ RESERVED_WORDS = [
'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class', 'and', 'array', 'as', 'at', 'asm', 'at', 'begin', 'case', 'class',
...@@ -29,11 +30,11 @@ module Scanners ...@@ -29,11 +30,11 @@ module Scanners
'virtual', 'write', 'writeonly' 'virtual', 'write', 'writeonly'
] ]
IDENT_KIND = CaseIgnoringWordList.new(:ident, caching=true). IDENT_KIND = CaseIgnoringWordList.new(:ident).
add(RESERVED_WORDS, :reserved). add(RESERVED_WORDS, :reserved).
add(DIRECTIVES, :directive) add(DIRECTIVES, :directive)
NAME_FOLLOWS = CaseIgnoringWordList.new(false, caching=true). NAME_FOLLOWS = CaseIgnoringWordList.new(false).
add(%w(procedure function .)) add(%w(procedure function .))
private private
...@@ -129,7 +130,7 @@ module Scanners ...@@ -129,7 +130,7 @@ module Scanners
end end
match ||= matched match ||= matched
if $DEBUG and not kind if $CODERAY_DEBUG and not kind
raise_inspect 'Error token %p in line %d' % raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens, state [[match, kind], line], tokens, state
end end
......
module CodeRay
module Scanners
class Diff < Scanner
register_for :diff
title 'diff output'
def scan_tokens tokens, options
line_kind = nil
state = :initial
until eos?
kind = match = nil
if match = scan(/\n/)
if line_kind
tokens << [:end_line, line_kind]
line_kind = nil
end
tokens << [match, :space]
next
end
case state
when :initial
if match = scan(/--- |\+\+\+ |=+|_+/)
tokens << [:begin_line, line_kind = :head]
tokens << [match, :head]
next unless match = scan(/.+/)
kind = :plain
elsif match = scan(/Index: |Property changes on: /)
tokens << [:begin_line, line_kind = :head]
tokens << [match, :head]
next unless match = scan(/.+/)
kind = :plain
elsif match = scan(/Added: /)
tokens << [:begin_line, line_kind = :head]
tokens << [match, :head]
next unless match = scan(/.+/)
kind = :plain
state = :added
elsif match = scan(/\\ /)
tokens << [:begin_line, line_kind = :change]
tokens << [match, :change]
next unless match = scan(/.+/)
kind = :plain
elsif scan(/(@@)((?>[^@\n]*))(@@)/)
tokens << [:begin_line, line_kind = :change]
tokens << [self[1], :change]
tokens << [self[2], :plain]
tokens << [self[3], :change]
next unless match = scan(/.+/)
kind = :plain
elsif match = scan(/\+/)
tokens << [:begin_line, line_kind = :insert]
tokens << [match, :insert]
next unless match = scan(/.+/)
kind = :plain
elsif match = scan(/-/)
tokens << [:begin_line, line_kind = :delete]
tokens << [match, :delete]
next unless match = scan(/.+/)
kind = :plain
elsif scan(/ .*/)
kind = :comment
elsif scan(/.+/)
tokens << [:begin_line, line_kind = :head]
kind = :plain
else
raise_inspect 'else case rached'
end
when :added
if match = scan(/ \+/)
tokens << [:begin_line, line_kind = :insert]
tokens << [match, :insert]
next unless match = scan(/.+/)
kind = :plain
else
state = :initial
next
end
end
match ||= matched
if $CODERAY_DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens
end
raise_inspect 'Empty token', tokens unless match
tokens << [match, kind]
end
tokens << [:end_line, line_kind] if line_kind
tokens
end
end
end
end
module CodeRay
module Scanners
load :java
class Groovy < Java
include Streamable
register_for :groovy
# TODO: Check this!
GROOVY_KEYWORDS = %w[
as assert def in
]
KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
case instanceof new return throw typeof while as assert in
]
GROOVY_MAGIC_VARIABLES = %w[ it ]
IDENT_KIND = Java::IDENT_KIND.dup.
add(GROOVY_KEYWORDS, :keyword).
add(GROOVY_MAGIC_VARIABLES, :local_variable)
ESCAPE = / [bfnrtv$\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # no 4-byte unicode chars? U[a-fA-F0-9]{8}
REGEXP_ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | \d | [bBdDsSwW\/] /x
# TODO: interpretation inside ', ", /
STRING_CONTENT_PATTERN = {
"'" => /(?>\\[^\\'\n]+|[^\\'\n]+)+/,
'"' => /[^\\$"\n]+/,
"'''" => /(?>[^\\']+|'(?!''))+/,
'"""' => /(?>[^\\$"]+|"(?!""))+/,
'/' => /[^\\$\/\n]+/,
}
def scan_tokens tokens, options
state = :initial
inline_block_stack = []
inline_block_paren_depth = nil
string_delimiter = nil
import_clause = class_name_follows = last_token = after_def = false
value_expected = true
until eos?
kind = nil
match = nil
case state
when :initial
if match = scan(/ \s+ | \\\n /x)
tokens << [match, :space]
if match.index ?\n
import_clause = after_def = false
value_expected = true unless value_expected
end
next
elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
value_expected = true
after_def = false
kind = :comment
elsif bol? && scan(/ \#!.* /x)
kind = :doctype
elsif import_clause && scan(/ (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /ox)
after_def = value_expected = false
kind = :include
elsif match = scan(/ #{IDENT} | \[\] /ox)
kind = IDENT_KIND[match]
value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
if last_token == '.'
kind = :ident
elsif class_name_follows
kind = :class
class_name_follows = false
elsif after_def && check(/\s*[({]/)
kind = :method
after_def = false
elsif kind == :ident && last_token != '?' && check(/:/)
kind = :key
else
class_name_follows = true if match == 'class' || (import_clause && match == 'as')
import_clause = match == 'import'
after_def = true if match == 'def'
end
elsif scan(/;/)
import_clause = after_def = false
value_expected = true
kind = :operator
elsif scan(/\{/)
class_name_follows = after_def = false
value_expected = true
kind = :operator
if !inline_block_stack.empty?
inline_block_paren_depth += 1
end
# TODO: ~'...', ~"..." and ~/.../ style regexps
elsif match = scan(/ \.\.<? | \*?\.(?!\d)@? | \.& | \?:? | [,?:(\[] | -[->] | \+\+ |
&& | \|\| | \*\*=? | ==?~ | <=?>? | [-+*%^~&|>=!]=? | <<<?=? | >>>?=? /x)
value_expected = true
value_expected = :regexp if match == '~'
after_def = false
kind = :operator
elsif match = scan(/ [)\]}] /x)
value_expected = after_def = false
if !inline_block_stack.empty? && match == '}'
inline_block_paren_depth -= 1
if inline_block_paren_depth == 0 # closing brace of inline block reached
tokens << [match, :inline_delimiter]
tokens << [:close, :inline]
state, string_delimiter, inline_block_paren_depth = inline_block_stack.pop
next
end
end
kind = :operator
elsif check(/[\d.]/)
after_def = value_expected = false
if scan(/0[xX][0-9A-Fa-f]+/)
kind = :hex
elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
kind = :oct
elsif scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
kind = :float
elsif scan(/\d+[lLgG]?/)
kind = :integer
end
elsif match = scan(/'''|"""/)
after_def = value_expected = false
state = :multiline_string
tokens << [:open, :string]
string_delimiter = match
kind = :delimiter
# TODO: record.'name'
elsif match = scan(/["']/)
after_def = value_expected = false
state = match == '/' ? :regexp : :string
tokens << [:open, state]
string_delimiter = match
kind = :delimiter
elsif value_expected && (match = scan(/\//))
after_def = value_expected = false
tokens << [:open, :regexp]
state = :regexp
string_delimiter = '/'
kind = :delimiter
elsif scan(/ @ #{IDENT} /ox)
after_def = value_expected = false
kind = :annotation
elsif scan(/\//)
after_def = false
value_expected = true
kind = :operator
else
getch
kind = :error
end
when :string, :regexp, :multiline_string
if scan(STRING_CONTENT_PATTERN[string_delimiter])
kind = :content
elsif match = scan(state == :multiline_string ? /'''|"""/ : /["'\/]/)
tokens << [match, :delimiter]
if state == :regexp
# TODO: regexp modifiers? s, m, x, i?
modifiers = scan(/[ix]+/)
tokens << [modifiers, :modifier] if modifiers && !modifiers.empty?
end
state = :string if state == :multiline_string
tokens << [:close, state]
string_delimiter = nil
after_def = value_expected = false
state = :initial
next
elsif (state == :string || state == :multiline_string) &&
(match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
if string_delimiter[0] == ?' && !(match == "\\\\" || match == "\\'")
kind = :content
else
kind = :char
end
elsif state == :regexp && scan(/ \\ (?: #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
kind = :char
elsif match = scan(/ \$ #{IDENT} /mox)
tokens << [:open, :inline]
tokens << ['$', :inline_delimiter]
match = match[1..-1]
tokens << [match, IDENT_KIND[match]]
tokens << [:close, :inline]
next
elsif match = scan(/ \$ \{ /x)
tokens << [:open, :inline]
tokens << ['${', :inline_delimiter]
inline_block_stack << [state, string_delimiter, inline_block_paren_depth]
inline_block_paren_depth = 1
state = :initial
next
elsif scan(/ \$ /mx)
kind = :content
elsif scan(/ \\. /mx)
kind = :content
elsif scan(/ \\ | \n /x)
tokens << [:close, state]
kind = :error
after_def = value_expected = false
state = :initial
else
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
end
else
raise_inspect 'Unknown state', tokens
end
match ||= matched
if $CODERAY_DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens
end
raise_inspect 'Empty token', tokens unless match
last_token = match unless [:space, :comment, :doctype].include? kind
tokens << [match, kind]
end
if [:multiline_string, :string, :regexp].include? state
tokens << [:close, state]
end
tokens
end
end
end
end
...@@ -2,12 +2,17 @@ module CodeRay ...@@ -2,12 +2,17 @@ module CodeRay
module Scanners module Scanners
# HTML Scanner # HTML Scanner
#
# $Id$
class HTML < Scanner class HTML < Scanner
include Streamable include Streamable
register_for :html register_for :html
KINDS_NOT_LOC = [
:comment, :doctype, :preprocessor,
:tag, :attribute_name, :operator,
:attribute_value, :delimiter, :content,
:plain, :entity, :error
]
ATTR_NAME = /[\w.:-]+/ ATTR_NAME = /[\w.:-]+/
ATTR_VALUE_UNQUOTED = ATTR_NAME ATTR_VALUE_UNQUOTED = ATTR_NAME
...@@ -65,14 +70,14 @@ module Scanners ...@@ -65,14 +70,14 @@ module Scanners
if scan(/<!--.*?-->/m) if scan(/<!--.*?-->/m)
kind = :comment kind = :comment
elsif scan(/<!DOCTYPE.*?>/m) elsif scan(/<!DOCTYPE.*?>/m)
kind = :preprocessor kind = :doctype
elsif scan(/<\?xml.*?\?>/m) elsif scan(/<\?xml.*?\?>/m)
kind = :preprocessor kind = :preprocessor
elsif scan(/<\?.*?\?>|<%.*?%>/m) elsif scan(/<\?.*?\?>|<%.*?%>/m)
kind = :comment kind = :comment
elsif scan(/<\/[-\w_.:]*>/m) elsif scan(/<\/[-\w.:]*>/m)
kind = :tag kind = :tag
elsif match = scan(/<[-\w_.:]+>?/m) elsif match = scan(/<[-\w.:]+>?/m)
kind = :tag kind = :tag
state = :attribute unless match[-1] == ?> state = :attribute unless match[-1] == ?>
elsif scan(/[^<>&]+/) elsif scan(/[^<>&]+/)
...@@ -154,7 +159,7 @@ module Scanners ...@@ -154,7 +159,7 @@ module Scanners
end end
match ||= matched match ||= matched
if $DEBUG and not kind if $CODERAY_DEBUG and not kind
raise_inspect 'Error token %p in line %d' % raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens, state [[match, kind], line], tokens, state
end end
......
module CodeRay
module Scanners
class Java < Scanner
include Streamable
register_for :java
helper :builtin_types
# http://java.sun.com/docs/books/tutorial/java/nutsandbolts/_keywords.html
KEYWORDS = %w[
assert break case catch continue default do else
finally for if instanceof import new package
return switch throw try typeof while
debugger export
]
RESERVED = %w[ const goto ]
CONSTANTS = %w[ false null true ]
MAGIC_VARIABLES = %w[ this super ]
TYPES = %w[
boolean byte char class double enum float int interface long
short void
] << '[]' # because int[] should be highlighted as a type
DIRECTIVES = %w[
abstract extends final implements native private protected public
static strictfp synchronized throws transient volatile
]
IDENT_KIND = WordList.new(:ident).
add(KEYWORDS, :keyword).
add(RESERVED, :reserved).
add(CONSTANTS, :pre_constant).
add(MAGIC_VARIABLES, :local_variable).
add(TYPES, :type).
add(BuiltinTypes::List, :pre_type).
add(BuiltinTypes::List.select { |builtin| builtin[/(Error|Exception)$/] }, :exception).
add(DIRECTIVES, :directive)
ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
STRING_CONTENT_PATTERN = {
"'" => /[^\\']+/,
'"' => /[^\\"]+/,
'/' => /[^\\\/]+/,
}
IDENT = /[a-zA-Z_][A-Za-z_0-9]*/
def scan_tokens tokens, options
state = :initial
string_delimiter = nil
import_clause = class_name_follows = last_token_dot = false
until eos?
kind = nil
match = nil
case state
when :initial
if match = scan(/ \s+ | \\\n /x)
tokens << [match, :space]
next
elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
tokens << [match, :comment]
next
elsif import_clause && scan(/ #{IDENT} (?: \. #{IDENT} )* /ox)
kind = :include
elsif match = scan(/ #{IDENT} | \[\] /ox)
kind = IDENT_KIND[match]
if last_token_dot
kind = :ident
elsif class_name_follows
kind = :class
class_name_follows = false
else
import_clause = true if match == 'import'
class_name_follows = true if match == 'class' || match == 'interface'
end
elsif scan(/ \.(?!\d) | [,?:()\[\]}] | -- | \+\+ | && | \|\| | \*\*=? | [-+*\/%^~&|<>=!]=? | <<<?=? | >>>?=? /x)
kind = :operator
elsif scan(/;/)
import_clause = false
kind = :operator
elsif scan(/\{/)
class_name_follows = false
kind = :operator
elsif check(/[\d.]/)
if scan(/0[xX][0-9A-Fa-f]+/)
kind = :hex
elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
kind = :oct
elsif scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
kind = :float
elsif scan(/\d+[lL]?/)
kind = :integer
end
elsif match = scan(/["']/)
tokens << [:open, :string]
state = :string
string_delimiter = match
kind = :delimiter
elsif scan(/ @ #{IDENT} /ox)
kind = :annotation
else
getch
kind = :error
end
when :string
if scan(STRING_CONTENT_PATTERN[string_delimiter])
kind = :content
elsif match = scan(/["'\/]/)
tokens << [match, :delimiter]
tokens << [:close, state]
string_delimiter = nil
state = :initial
next
elsif state == :string && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
kind = :content
else
kind = :char
end
elsif scan(/\\./m)
kind = :content
elsif scan(/ \\ | $ /x)
tokens << [:close, :delimiter]
kind = :error
state = :initial
else
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
end
else
raise_inspect 'Unknown state', tokens
end
match ||= matched
if $CODERAY_DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens
end
raise_inspect 'Empty token', tokens unless match
last_token_dot = match == '.'
tokens << [match, kind]
end
if state == :string
tokens << [:close, state]
end
tokens
end
end
end
end
module CodeRay
module Scanners
class JavaScript < Scanner
include Streamable
register_for :java_script
file_extension 'js'
# The actual JavaScript keywords.
KEYWORDS = %w[
break case catch continue default delete do else
finally for function if in instanceof new
return switch throw try typeof var void while with
]
PREDEFINED_CONSTANTS = %w[
false null true undefined
]
MAGIC_VARIABLES = %w[ this arguments ] # arguments was introduced in JavaScript 1.4
KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
case delete in instanceof new return throw typeof with
]
# Reserved for future use.
RESERVED_WORDS = %w[
abstract boolean byte char class debugger double enum export extends
final float goto implements import int interface long native package
private protected public short static super synchronized throws transient
volatile
]
IDENT_KIND = WordList.new(:ident).
add(RESERVED_WORDS, :reserved).
add(PREDEFINED_CONSTANTS, :pre_constant).
add(MAGIC_VARIABLES, :local_variable).
add(KEYWORDS, :keyword)
ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} /x
REGEXP_ESCAPE = / [bBdDsSwW] /x
STRING_CONTENT_PATTERN = {
"'" => /[^\\']+/,
'"' => /[^\\"]+/,
'/' => /[^\\\/]+/,
}
KEY_CHECK_PATTERN = {
"'" => / [^\\']* (?: \\.? [^\\']* )* '? \s* : /x,
'"' => / [^\\"]* (?: \\.? [^\\"]* )* "? \s* : /x,
}
def scan_tokens tokens, options
state = :initial
string_delimiter = nil
value_expected = true
key_expected = false
function_expected = false
until eos?
kind = nil
match = nil
case state
when :initial
if match = scan(/ \s+ | \\\n /x)
value_expected = true if !value_expected && match.index(?\n)
tokens << [match, :space]
next
elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
value_expected = true
kind = :comment
elsif check(/\.?\d/)
key_expected = value_expected = false
if scan(/0[xX][0-9A-Fa-f]+/)
kind = :hex
elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
kind = :oct
elsif scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
kind = :float
elsif scan(/\d+/)
kind = :integer
end
elsif value_expected && match = scan(/<([[:alpha:]]\w*) (?: [^\/>]*\/> | .*?<\/\1>)/xim)
# FIXME: scan over nested tags
xml_scanner.tokenize match
value_expected = false
next
elsif match = scan(/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x)
value_expected = true
last_operator = match[-1]
key_expected = (last_operator == ?{) || (last_operator == ?,)
function_expected = false
kind = :operator
elsif scan(/ [)\]}]+ /x)
function_expected = key_expected = value_expected = false
kind = :operator
elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x)
kind = IDENT_KIND[match]
value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
# TODO: labels
if kind == :ident
if match.index(?$) # $ allowed inside an identifier
kind = :predefined
elsif function_expected
kind = :function
elsif check(/\s*[=:]\s*function\b/)
kind = :function
elsif key_expected && check(/\s*:/)
kind = :key
end
end
function_expected = (kind == :keyword) && (match == 'function')
key_expected = false
elsif match = scan(/["']/)
if key_expected && check(KEY_CHECK_PATTERN[match])
state = :key
else
state = :string
end
tokens << [:open, state]
string_delimiter = match
kind = :delimiter
elsif value_expected && (match = scan(/\/(?=\S)/))
tokens << [:open, :regexp]
state = :regexp
string_delimiter = '/'
kind = :delimiter
elsif scan(/ \/ /x)
value_expected = true
key_expected = false
kind = :operator
else
getch
kind = :error
end
when :string, :regexp, :key
if scan(STRING_CONTENT_PATTERN[string_delimiter])
kind = :content
elsif match = scan(/["'\/]/)
tokens << [match, :delimiter]
if state == :regexp
modifiers = scan(/[gim]+/)
tokens << [modifiers, :modifier] if modifiers && !modifiers.empty?
end
tokens << [:close, state]
string_delimiter = nil
key_expected = value_expected = false
state = :initial
next
elsif state != :regexp && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
kind = :content
else
kind = :char
end
elsif state == :regexp && scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
kind = :char
elsif scan(/\\./m)
kind = :content
elsif scan(/ \\ | $ /x)
tokens << [:close, state]
kind = :error
key_expected = value_expected = false
state = :initial
else
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
end
else
raise_inspect 'Unknown state', tokens
end
match ||= matched
if $CODERAY_DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens
end
raise_inspect 'Empty token', tokens unless match
tokens << [match, kind]
end
if [:string, :regexp].include? state
tokens << [:close, state]
end
tokens
end
protected
def reset_instance
super
@xml_scanner.reset if defined? @xml_scanner
end
def xml_scanner
@xml_scanner ||= CodeRay.scanner :xml, :tokens => @tokens, :keep_tokens => true, :keep_state => true
end
end
end
end
module CodeRay
module Scanners
class JSON < Scanner
include Streamable
register_for :json
file_extension 'json'
KINDS_NOT_LOC = [
:float, :char, :content, :delimiter,
:error, :integer, :operator, :value,
]
CONSTANTS = %w( true false null )
IDENT_KIND = WordList.new(:key).add(CONSTANTS, :value)
ESCAPE = / [bfnrt\\"\/] /x
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x
def scan_tokens tokens, options
state = :initial
stack = []
string_delimiter = nil
key_expected = false
until eos?
kind = nil
match = nil
case state
when :initial
if match = scan(/ \s+ | \\\n /x)
tokens << [match, :space]
next
elsif match = scan(/ [:,\[{\]}] /x)
kind = :operator
case match
when '{' then stack << :object; key_expected = true
when '[' then stack << :array
when ':' then key_expected = false
when ',' then key_expected = true if stack.last == :object
when '}', ']' then stack.pop # no error recovery, but works for valid JSON
end
elsif match = scan(/ true | false | null /x)
kind = IDENT_KIND[match]
elsif match = scan(/-?(?:0|[1-9]\d*)/)
kind = :integer
if scan(/\.\d+(?:[eE][-+]?\d+)?|[eE][-+]?\d+/)
match << matched
kind = :float
end
elsif match = scan(/"/)
state = key_expected ? :key : :string
tokens << [:open, state]
kind = :delimiter
else
getch
kind = :error
end
when :string, :key
if scan(/[^\\"]+/)
kind = :content
elsif scan(/"/)
tokens << ['"', :delimiter]
tokens << [:close, state]
state = :initial
next
elsif scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
kind = :char
elsif scan(/\\./m)
kind = :content
elsif scan(/ \\ | $ /x)
tokens << [:close, :delimiter]
kind = :error
state = :initial
else
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens
end
else
raise_inspect 'Unknown state', tokens
end
match ||= matched
if $CODERAY_DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens
end
raise_inspect 'Empty token', tokens unless match
tokens << [match, kind]
end
if [:string, :key].include? state
tokens << [:close, state]
end
tokens
end
end
end
end
...@@ -5,13 +5,15 @@ module Scanners ...@@ -5,13 +5,15 @@ module Scanners
load :ruby load :ruby
# Nitro XHTML Scanner # Nitro XHTML Scanner
#
# $Id$
class NitroXHTML < Scanner class NitroXHTML < Scanner
include Streamable include Streamable
register_for :nitro_xhtml register_for :nitro_xhtml
file_extension :xhtml
title 'Nitro XHTML'
KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
NITRO_RUBY_BLOCK = / NITRO_RUBY_BLOCK = /
<\?r <\?r
(?> (?>
...@@ -119,6 +121,7 @@ module Scanners ...@@ -119,6 +121,7 @@ module Scanners
else else
raise_inspect 'else-case reached!', tokens raise_inspect 'else-case reached!', tokens
end end
end end
......
This diff is collapsed.
...@@ -4,9 +4,12 @@ module Scanners ...@@ -4,9 +4,12 @@ module Scanners
class Plaintext < Scanner class Plaintext < Scanner
register_for :plaintext, :plain register_for :plaintext, :plain
title 'Plain text'
include Streamable include Streamable
KINDS_NOT_LOC = [:plain]
def scan_tokens tokens, options def scan_tokens tokens, options
text = (scan_until(/\z/) || '') text = (scan_until(/\z/) || '')
tokens << [text, :plain] tokens << [text, :plain]
......
module CodeRay
module Scanners
# Bases on pygments' PythonLexer, see
# http://dev.pocoo.org/projects/pygments/browser/pygments/lexers/agile.py.
class Python < Scanner
include Streamable
register_for :python
file_extension 'py'
KEYWORDS = [
'and', 'as', 'assert', 'break', 'class', 'continue', 'def',
'del', 'elif', 'else', 'except', 'finally', 'for',
'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'not',
'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield',
'nonlocal', # new in Python 3
]
OLD_KEYWORDS = [
'exec', 'print', # gone in Python 3
]
PREDEFINED_METHODS_AND_TYPES = %w[
__import__ abs all any apply basestring bin bool buffer
bytearray bytes callable chr classmethod cmp coerce compile
complex delattr dict dir divmod enumerate eval execfile exit
file filter float frozenset getattr globals hasattr hash hex id
input int intern isinstance issubclass iter len list locals
long map max min next object oct open ord pow property range
raw_input reduce reload repr reversed round set setattr slice
sorted staticmethod str sum super tuple type unichr unicode
vars xrange zip
]
PREDEFINED_EXCEPTIONS = %w[
ArithmeticError AssertionError AttributeError
BaseException DeprecationWarning EOFError EnvironmentError
Exception FloatingPointError FutureWarning GeneratorExit IOError
ImportError ImportWarning IndentationError IndexError KeyError
KeyboardInterrupt LookupError MemoryError NameError
NotImplemented NotImplementedError OSError OverflowError
OverflowWarning PendingDeprecationWarning ReferenceError
RuntimeError RuntimeWarning StandardError StopIteration
SyntaxError SyntaxWarning SystemError SystemExit TabError
TypeError UnboundLocalError UnicodeDecodeError
UnicodeEncodeError UnicodeError UnicodeTranslateError
UnicodeWarning UserWarning ValueError Warning ZeroDivisionError
]
PREDEFINED_VARIABLES_AND_CONSTANTS = [
'False', 'True', 'None', # "keywords" since Python 3
'self', 'Ellipsis', 'NotImplemented',
]
IDENT_KIND = WordList.new(:ident).
add(KEYWORDS, :keyword).
add(OLD_KEYWORDS, :old_keyword).
add(PREDEFINED_METHODS_AND_TYPES, :predefined).
add(PREDEFINED_VARIABLES_AND_CONSTANTS, :pre_constant).
add(PREDEFINED_EXCEPTIONS, :exception)
NAME = / [^\W\d] \w* /x
ESCAPE = / [abfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | N\{[-\w ]+\} /x
OPERATOR = /
\.\.\. | # ellipsis
\.(?!\d) | # dot but not decimal point
[,;:()\[\]{}] | # simple delimiters
\/\/=? | \*\*=? | # special math
[-+*\/%&|^]=? | # ordinary math and binary logic
[~`] | # binary complement and inspection
<<=? | >>=? | [<>=]=? | != # comparison and assignment
/x
STRING_DELIMITER_REGEXP = Hash.new do |h, delimiter|
h[delimiter] = Regexp.union delimiter
end
STRING_CONTENT_REGEXP = Hash.new do |h, delimiter|
h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x
end
DEF_NEW_STATE = WordList.new(:initial).
add(%w(def), :def_expected).
add(%w(import from), :include_expected).
add(%w(class), :class_expected)
DESCRIPTOR = /
#{NAME}
(?: \. #{NAME} )*
| \*
/x
def scan_tokens tokens, options
state = :initial
string_delimiter = nil
string_raw = false
import_clause = class_name_follows = last_token_dot = false
unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
from_import_state = []
until eos?
kind = nil
match = nil
if state == :string
if scan(STRING_DELIMITER_REGEXP[string_delimiter])
tokens << [matched, :delimiter]
tokens << [:close, :string]
state = :initial
next
elsif string_delimiter.size == 3 && scan(/\n/)
kind = :content
elsif scan(STRING_CONTENT_REGEXP[string_delimiter])
kind = :content
elsif !string_raw && scan(/ \\ #{ESCAPE} /ox)
kind = :char
elsif scan(/ \\ #{UNICODE_ESCAPE} /ox)
kind = :char
elsif scan(/ \\ . /x)
kind = :content
elsif scan(/ \\ | $ /x)
tokens << [:close, :string]
kind = :error
state = :initial
else
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens, state
end
elsif match = scan(/ [ \t]+ | \\\n /x)
tokens << [match, :space]
next
elsif match = scan(/\n/)
tokens << [match, :space]
state = :initial if state == :include_expected
next
elsif match = scan(/ \# [^\n]* /mx)
tokens << [match, :comment]
next
elsif state == :initial
if scan(/#{OPERATOR}/o)
kind = :operator
elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
tokens << [:open, :string]
string_delimiter = self[2]
string_raw = false
modifiers = self[1]
unless modifiers.empty?
string_raw = !!modifiers.index(?r)
tokens << [modifiers, :modifier]
match = string_delimiter
end
state = :string
kind = :delimiter
# TODO: backticks
elsif match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
kind = IDENT_KIND[match]
# TODO: keyword arguments
kind = :ident if last_token_dot
if kind == :old_keyword
kind = check(/\(/) ? :ident : :keyword
elsif kind == :predefined && check(/ *=/)
kind = :ident
elsif kind == :keyword
state = DEF_NEW_STATE[match]
from_import_state << match.to_sym if state == :include_expected
end
elsif scan(/@[a-zA-Z0-9_.]+[lL]?/)
kind = :decorator
elsif scan(/0[xX][0-9A-Fa-f]+[lL]?/)
kind = :hex
elsif scan(/0[bB][01]+[lL]?/)
kind = :bin
elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
kind = :float
if scan(/[jJ]/)
match << matched
kind = :imaginary
end
elsif scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
kind = :oct
elsif match = scan(/\d+([lL])?/)
kind = :integer
if self[1] == nil && scan(/[jJ]/)
match << matched
kind = :imaginary
end
else
getch
kind = :error
end
elsif state == :def_expected
state = :initial
if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
kind = :method
else
next
end
elsif state == :class_expected
state = :initial
if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
kind = :class
else
next
end
elsif state == :include_expected
if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o)
kind = :include
if match == 'as'
kind = :keyword
from_import_state << :as
elsif from_import_state.first == :from && match == 'import'
kind = :keyword
from_import_state << :import
elsif from_import_state.last == :as
# kind = match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
kind = :ident
from_import_state.pop
elsif IDENT_KIND[match] == :keyword
unscan
match = nil
state = :initial
next
end
elsif match = scan(/,/)
from_import_state.pop if from_import_state.last == :as
kind = :operator
else
from_import_state = []
state = :initial
next
end
else
raise_inspect 'Unknown state', tokens, state
end
match ||= matched
if $CODERAY_DEBUG and not kind
raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens, state
end
raise_inspect 'Empty token', tokens, state unless match
last_token_dot = match == '.'
tokens << [match, kind]
end
if state == :string
tokens << [:close, :string]
end
tokens
end
end
end
end
...@@ -5,12 +5,13 @@ module Scanners ...@@ -5,12 +5,13 @@ module Scanners
load :ruby load :ruby
# RHTML Scanner # RHTML Scanner
#
# $Id$
class RHTML < Scanner class RHTML < Scanner
include Streamable include Streamable
register_for :rhtml register_for :rhtml
title 'HTML ERB Template'
KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
ERB_RUBY_BLOCK = / ERB_RUBY_BLOCK = /
<%(?!%)[=-]? <%(?!%)[=-]?
......
...@@ -5,8 +5,11 @@ module CodeRay ...@@ -5,8 +5,11 @@ module CodeRay
# Thanks to murphy for putting CodeRay into public. # Thanks to murphy for putting CodeRay into public.
class Scheme < Scanner class Scheme < Scanner
# TODO: function defs
# TODO: built-in functions
register_for :scheme register_for :scheme
file_extension :scm file_extension 'scm'
CORE_FORMS = %w[ CORE_FORMS = %w[
lambda let let* letrec syntax-case define-syntax let-syntax lambda let let* letrec syntax-case define-syntax let-syntax
...@@ -120,7 +123,7 @@ module CodeRay ...@@ -120,7 +123,7 @@ module CodeRay
end end
match ||= matched match ||= matched
if $DEBUG and not kind if $CODERAY_DEBUG and not kind
raise_inspect 'Error token %p in line %d' % raise_inspect 'Error token %p in line %d' %
[[match, kind], line], tokens [[match, kind], line], tokens
end end
......
This diff is collapsed.
...@@ -5,13 +5,12 @@ module Scanners ...@@ -5,13 +5,12 @@ module Scanners
# XML Scanner # XML Scanner
# #
# $Id$
#
# Currently this is the same scanner as Scanners::HTML. # Currently this is the same scanner as Scanners::HTML.
class XML < HTML class XML < HTML
register_for :xml register_for :xml
file_extension 'xml'
end end
end end
......
This diff is collapsed.
...@@ -8,7 +8,7 @@ module Styles ...@@ -8,7 +8,7 @@ module Styles
code_background = '#f8f8f8' code_background = '#f8f8f8'
numbers_background = '#def' numbers_background = '#def'
border_color = 'silver' border_color = 'silver'
normal_color = '#100' normal_color = '#000'
CSS_MAIN_STYLES = <<-MAIN CSS_MAIN_STYLES = <<-MAIN
.CodeRay { .CodeRay {
...@@ -32,6 +32,7 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top } ...@@ -32,6 +32,7 @@ table.CodeRay td { padding: 2px 4px; vertical-align: top }
text-align: right; text-align: right;
} }
.CodeRay .line_numbers tt { font-weight: bold } .CodeRay .line_numbers tt { font-weight: bold }
.CodeRay .line_numbers .highlighted { color: red }
.CodeRay .no { padding: 0px 4px } .CodeRay .no { padding: 0px 4px }
.CodeRay .code { width: 100% } .CodeRay .code { width: 100% }
...@@ -46,28 +47,32 @@ ol.CodeRay li { white-space: pre } ...@@ -46,28 +47,32 @@ ol.CodeRay li { white-space: pre }
.af { color:#00C } .af { color:#00C }
.an { color:#007 } .an { color:#007 }
.at { color:#f08 }
.av { color:#700 } .av { color:#700 }
.aw { color:#C00 } .aw { color:#C00 }
.bi { color:#509; font-weight:bold } .bi { color:#509; font-weight:bold }
.c { color:#666; } .c { color:#888; }
.ch { color:#04D } .ch { color:#04D }
.ch .k { color:#04D } .ch .k { color:#04D }
.ch .dl { color:#039 } .ch .dl { color:#039 }
.cl { color:#B06; font-weight:bold } .cl { color:#B06; font-weight:bold }
.cm { color:#A08; font-weight:bold }
.co { color:#036; font-weight:bold } .co { color:#036; font-weight:bold }
.cr { color:#0A0 } .cr { color:#0A0 }
.cv { color:#369 } .cv { color:#369 }
.de { color:#B0B; }
.df { color:#099; font-weight:bold } .df { color:#099; font-weight:bold }
.di { color:#088; font-weight:bold } .di { color:#088; font-weight:bold }
.dl { color:black } .dl { color:black }
.do { color:#970 } .do { color:#970 }
.dt { color:#34b }
.ds { color:#D42; font-weight:bold } .ds { color:#D42; font-weight:bold }
.e { color:#666; font-weight:bold } .e { color:#666; font-weight:bold }
.en { color:#800; font-weight:bold } .en { color:#800; font-weight:bold }
.er { color:#F00; background-color:#FAA } .er { color:#F00; background-color:#FAA }
.ex { color:#F00; font-weight:bold } .ex { color:#C00; font-weight:bold }
.fl { color:#60E; font-weight:bold } .fl { color:#60E; font-weight:bold }
.fu { color:#06B; font-weight:bold } .fu { color:#06B; font-weight:bold }
.gv { color:#d70; font-weight:bold } .gv { color:#d70; font-weight:bold }
...@@ -75,11 +80,13 @@ ol.CodeRay li { white-space: pre } ...@@ -75,11 +80,13 @@ ol.CodeRay li { white-space: pre }
.i { color:#00D; font-weight:bold } .i { color:#00D; font-weight:bold }
.ic { color:#B44; font-weight:bold } .ic { color:#B44; font-weight:bold }
.il { background: #eee } .il { background: #ddd; color: black }
.il .il { background: #ddd } .il .il { background: #ccc }
.il .il .il { background: #ccc } .il .il .il { background: #bbb }
.il .idl { font-weight: bold; color: #888 } .il .idl { background: #ddd; font-weight: bold; color: #666 }
.idl { background-color: #bbb; font-weight: bold; color: #666; }
.im { color:#f00; }
.in { color:#B2B; font-weight:bold } .in { color:#B2B; font-weight:bold }
.iv { color:#33B } .iv { color:#33B }
.la { color:#970; font-weight:bold } .la { color:#970; font-weight:bold }
...@@ -89,9 +96,15 @@ ol.CodeRay li { white-space: pre } ...@@ -89,9 +96,15 @@ ol.CodeRay li { white-space: pre }
.op { } .op { }
.pc { color:#038; font-weight:bold } .pc { color:#038; font-weight:bold }
.pd { color:#369; font-weight:bold } .pd { color:#369; font-weight:bold }
.pp { color:#579 } .pp { color:#579; }
.pt { color:#339; font-weight:bold } .ps { color:#00C; font-weight:bold }
.r { color:#080; font-weight:bold } .pt { color:#074; font-weight:bold }
.r, .kw { color:#080; font-weight:bold }
.ke { color: #808; }
.ke .dl { color: #606; }
.ke .ch { color: #80f; }
.vl { color: #088; }
.rx { background-color:#fff0ff } .rx { background-color:#fff0ff }
.rx .k { color:#808 } .rx .k { color:#808 }
...@@ -99,14 +112,15 @@ ol.CodeRay li { white-space: pre } ...@@ -99,14 +112,15 @@ ol.CodeRay li { white-space: pre }
.rx .mod { color:#C2C } .rx .mod { color:#C2C }
.rx .fu { color:#404; font-weight: bold } .rx .fu { color:#404; font-weight: bold }
.s { background-color:#fff0f0 } .s { background-color:#fff0f0; color: #D20; }
.s .s { background-color:#ffe0e0 } .s .s { background-color:#ffe0e0 }
.s .s .s { background-color:#ffd0d0 } .s .s .s { background-color:#ffd0d0 }
.s .k { color:#D20 } .s .k { }
.s .dl { color:#710 } .s .ch { color: #b0b; }
.s .dl { color: #710; }
.sh { background-color:#f0fff0 } .sh { background-color:#f0fff0; color:#2B2 }
.sh .k { color:#2B2 } .sh .k { }
.sh .dl { color:#161 } .sh .dl { color:#161 }
.sy { color:#A60 } .sy { color:#A60 }
...@@ -119,6 +133,16 @@ ol.CodeRay li { white-space: pre } ...@@ -119,6 +133,16 @@ ol.CodeRay li { white-space: pre }
.ty { color:#339; font-weight:bold } .ty { color:#339; font-weight:bold }
.v { color:#036 } .v { color:#036 }
.xt { color:#444 } .xt { color:#444 }
.ins { background: #afa; }
.del { background: #faa; }
.chg { color: #aaf; background: #007; }
.head { color: #f8f; background: #505 }
.ins .ins { color: #080; font-weight:bold }
.del .del { color: #800; font-weight:bold }
.chg .chg { color: #66f; }
.head .head { color: #f4f; }
TOKENS TOKENS
end end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment