2 # $Id: lexer.rb 13966 2007-11-19 07:10:09Z matz $
4 # Copyright (c) 2004,2005 Minero Aoki
6 # This program is free software.
7 # You can distribute and/or modify this program under the Ruby License.
8 # For details of Ruby License, see ruby/COPYING.
15 # Tokenizes Ruby program and returns an Array of String.
16 def Ripper.tokenize(src, filename = '-', lineno = 1)
17 Lexer.new(src, filename, lineno).tokenize
20 # Tokenizes Ruby program and returns an Array of Array,
21 # which is formatted like [[lineno, column], type, token].
26 # p Ripper.lex("def m(a) nil end")
27 # #=> [[[1, 0], :on_kw, "def"],
28 # [[1, 3], :on_sp, " " ],
29 # [[1, 4], :on_ident, "m" ],
30 # [[1, 5], :on_lparen, "(" ],
31 # [[1, 6], :on_ident, "a" ],
32 # [[1, 7], :on_rparen, ")" ],
33 # [[1, 8], :on_sp, " " ],
34 # [[1, 9], :on_kw, "nil"],
35 # [[1, 12], :on_sp, " " ],
36 # [[1, 13], :on_kw, "end"]]
38 def Ripper.lex(src, filename = '-', lineno = 1)
39 Lexer.new(src, filename, lineno).lex
42 class Lexer < ::Ripper #:nodoc: internal use only
44 lex().map {|pos, event, tok| tok }
48 parse().sort_by {|pos, event, tok| pos }
59 SCANNER_EVENTS.each do |event|
60 module_eval(<<-End, __FILE__+'/module_eval', __LINE__ + 1)
62 @buf.push [[lineno(), column()], :on_#{event}, tok]
69 # Parses +src+ and return a string which was matched to +pattern+.
70 # +pattern+ should be described as Regexp.
74 # p Ripper.slice('def m(a) nil end', 'ident') #=> "m"
75 # p Ripper.slice('def m(a) nil end', '[ident lparen rparen]+') #=> "m(a)"
76 # p Ripper.slice("<<EOS\nstring\nEOS",
77 # 'heredoc_beg nl $(tstring_content*) heredoc_end', 1)
80 def Ripper.slice(src, pattern, n = 0)
81 if m = token_match(src, pattern)
87 def Ripper.token_match(src, pattern) #:nodoc:
88 TokenPattern.compile(pattern).match(src)
91 class TokenPattern #:nodoc:
93 class Error < ::StandardError; end
94 class CompileError < Error; end
95 class MatchError < Error; end
101 def initialize(pattern)
103 @re = compile(pattern)
107 match_list(::Ripper.lex(str))
110 def match_list(tokens)
111 if m = @re.match(map_tokens(tokens))
112 then MatchData.new(tokens, m)
120 if m = /[^\w\s$()\[\]{}?*+\.]/.match(pattern)
121 raise CompileError, "invalid char in pattern: #{m[0].inspect}"
124 pattern.scan(/(?:\w+|\$\(|[()\[\]\{\}?*+\.]+)/) do |tok|
127 buf.concat map_token(tok)
135 raise 'must not happen'
139 rescue RegexpError => err
140 raise CompileError, err.message
143 def map_tokens(tokens)
144 tokens.map {|pos,type,str| map_token(type.to_s.sub(/\Aon_/,'')) }.join
148 seed = ('a'..'z').to_a + ('A'..'Z').to_a + ('0'..'9').to_a
149 SCANNER_EVENT_TABLE.each do |ev, |
150 raise CompileError, "[RIPPER FATAL] too many system token" if seed.empty?
151 MAP[ev.to_s.sub(/\Aon_/,'')] = seed.shift
155 MAP[tok] or raise CompileError, "unknown token: #{tok}"
159 def initialize(tokens, match)
165 return nil unless @match
172 return [] unless @match
173 @tokens[@match.begin(n)...@match.end(n)].map {|pos,type,str| str }