Extended maintenance of Ruby 1.9.3 ended on February 23, 2015. Read more
# File irb/ruby-lex.rb, line 39
def initialize
lex_init
set_input(STDIN)
@seek = 0
@exp_line_no = @line_no = 1
@base_char_no = 0
@char_no = 0
@rests = []
@readed = []
@here_readed = []
@indent = 0
@indent_stack = []
@lex_state = EXPR_BEG
@space_seen = false
@here_header = false
@continue = false
@line = ""
@skip_space = false
@readed_auto_clean_up = false
@exception_on_syntax_error = true
@prompt = nil
end
# File irb/ruby-lex.rb, line 226
def each_top_level_statement
initialize_input
catch(:TERM_INPUT) do
loop do
begin
@continue = false
prompt
unless l = lex
throw :TERM_INPUT if @line == ''
else
@line.concat l
if @ltype or @continue or @indent > 0
next
end
end
if @line != "\n"
@line.force_encoding(@io.encoding)
yield @line, @exp_line_no
end
break unless l
@line = ''
@exp_line_no = @line_no
@indent = 0
@indent_stack = []
prompt
rescue TerminateLineInput
initialize_input
prompt
get_readed
end
end
end
end
# File irb/ruby-lex.rb, line 88
def get_readed
if idx = @readed.reverse.index("\n")
@base_char_no = idx
else
@base_char_no += @readed.size
end
readed = @readed.join("")
@readed = []
readed
end
# File irb/ruby-lex.rb, line 100
def getc
while @rests.empty?
# return nil unless buf_input
@rests.push nil unless buf_input
end
c = @rests.shift
if @here_header
@here_readed.push c
else
@readed.push c
end
@seek += 1
if c == "\n"
@line_no += 1
@char_no = 0
else
@char_no += 1
end
c
end
# File irb/ruby-lex.rb, line 135
def getc_of_rests
if @rests.empty?
nil
else
getc
end
end
# File irb/ruby-lex.rb, line 121
def gets
l = ""
while c = getc
l.concat(c)
break if c == "\n"
end
return nil if l == "" and c.nil?
l
end
# File irb/ruby-lex.rb, line 1123
def identify_comment
@ltype = "#"
while ch = getc
# if ch == "\\" #"
# read_escape
# end
if ch == "\n"
@ltype = nil
ungetc
break
end
end
return Token(TkCOMMENT)
end
# File irb/ruby-lex.rb, line 740
def identify_gvar
@lex_state = EXPR_END
case ch = getc
when /[~_*$?!@\/\;,=:<>".]/ #"
Token(TkGVAR, "$" + ch)
when "-"
Token(TkGVAR, "$-" + getc)
when "&", "`", "'", "+"
Token(TkBACK_REF, "$"+ch)
when /[1-9]/
while getc =~ /[0-9]/; end
ungetc
Token(TkNTH_REF)
when /\w/
ungetc
ungetc
identify_identifier
else
ungetc
Token("$")
end
end
# File irb/ruby-lex.rb, line 873
def identify_here_document
ch = getc
# if lt = PERCENT_LTYPE[ch]
if ch == "-"
ch = getc
indent = true
end
if /['"`]/ =~ ch
lt = ch
quoted = ""
while (c = getc) && c != lt
quoted.concat c
end
else
lt = '"'
quoted = ch.dup
while (c = getc) && c =~ /\w/
quoted.concat c
end
ungetc
end
ltback, @ltype = @ltype, lt
reserve = []
while ch = getc
reserve.push ch
if ch == "\\"
reserve.push ch = getc
elsif ch == "\n"
break
end
end
@here_header = false
while l = gets
l = l.sub(/(:?\r)?\n\z/, '')
if (indent ? l.strip : l) == quoted
break
end
end
@here_header = true
@here_readed.concat reserve
while ch = reserve.pop
ungetc ch
end
@ltype = ltback
@lex_state = EXPR_END
Token(Ltype2Token[lt])
end
# File irb/ruby-lex.rb, line 764
def identify_identifier
token = ""
if peek(0) =~ /[$@]/
token.concat(c = getc)
if c == "@" and peek(0) == "@"
token.concat getc
end
end
while (ch = getc) =~ /[^\x00-\/:-@\[-^`{-\x7F]/
print ":", ch, ":" if RubyLex.debug?
token.concat ch
end
ungetc
if (ch == "!" || ch == "?") && token[0,1] =~ /\w/ && peek(0) != "="
token.concat getc
end
# almost fix token
case token
when /^\$/
return Token(TkGVAR, token)
when /^\@\@/
@lex_state = EXPR_END
# p Token(TkCVAR, token)
return Token(TkCVAR, token)
when /^\@/
@lex_state = EXPR_END
return Token(TkIVAR, token)
end
if @lex_state != EXPR_DOT
print token, "\n" if RubyLex.debug?
token_c, *trans = TkReading2Token[token]
if token_c
# reserved word?
if (@lex_state != EXPR_BEG &&
@lex_state != EXPR_FNAME &&
trans[1])
# modifiers
token_c = TkSymbol2Token[trans[1]]
@lex_state = trans[0]
else
if @lex_state != EXPR_FNAME
if ENINDENT_CLAUSE.include?(token)
# check for ``class = val'' etc.
valid = true
case token
when "class"
valid = false unless peek_match?(/^\s*(<<|\w|::)/)
when "def"
valid = false if peek_match?(/^\s*(([+\-\/*&\|^]|<<|>>|\|\||\&\&)=|\&\&|\|\|)/)
when "do"
valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&)/)
when *ENINDENT_CLAUSE
valid = false if peek_match?(/^\s*([+\-\/*]?=|\*|<|>|\&|\|)/)
else
# no nothing
end
if valid
if token == "do"
if ![TkFOR, TkWHILE, TkUNTIL].include?(@indent_stack.last)
@indent += 1
@indent_stack.push token_c
end
else
@indent += 1
@indent_stack.push token_c
end
# p @indent_stack
end
elsif DEINDENT_CLAUSE.include?(token)
@indent -= 1
@indent_stack.pop
end
@lex_state = trans[0]
else
@lex_state = EXPR_END
end
end
return Token(token_c, token)
end
end
if @lex_state == EXPR_FNAME
@lex_state = EXPR_END
if peek(0) == '='
token.concat getc
end
elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
@lex_state = EXPR_ARG
else
@lex_state = EXPR_END
end
if token[0, 1] =~ /[A-Z]/
return Token(TkCONSTANT, token)
elsif token[token.size - 1, 1] =~ /[!?]/
return Token(TkFID, token)
else
return Token(TkIDENTIFIER, token)
end
end
# File irb/ruby-lex.rb, line 943
def identify_number
@lex_state = EXPR_END
if peek(0) == "0" && peek(1) !~ /[.eE]/
getc
case peek(0)
when /[xX]/
ch = getc
match = /[0-9a-fA-F_]/
when /[bB]/
ch = getc
match = /[01_]/
when /[oO]/
ch = getc
match = /[0-7_]/
when /[dD]/
ch = getc
match = /[0-9_]/
when /[0-7]/
match = /[0-7_]/
when /[89]/
RubyLex.fail SyntaxError, "Invalid octal digit"
else
return Token(TkINTEGER)
end
len0 = true
non_digit = false
while ch = getc
if match =~ ch
if ch == "_"
if non_digit
RubyLex.fail SyntaxError, "trailing `#{ch}' in number"
else
non_digit = ch
end
else
non_digit = false
len0 = false
end
else
ungetc
if len0
RubyLex.fail SyntaxError, "numeric literal without digits"
end
if non_digit
RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
end
break
end
end
return Token(TkINTEGER)
end
type = TkINTEGER
allow_point = true
allow_e = true
non_digit = false
while ch = getc
case ch
when /[0-9]/
non_digit = false
when "_"
non_digit = ch
when allow_point && "."
if non_digit
RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
end
type = TkFLOAT
if peek(0) !~ /[0-9]/
type = TkINTEGER
ungetc
break
end
allow_point = false
when allow_e && "e", allow_e && "E"
if non_digit
RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
end
type = TkFLOAT
if peek(0) =~ /[+-]/
getc
end
allow_e = false
allow_point = false
non_digit = ch
else
if non_digit
RubyLex.fail SyntaxError, "trailing `#{non_digit}' in number"
end
ungetc
break
end
end
Token(type)
end
# File irb/ruby-lex.rb, line 925
def identify_quotation
ch = getc
if lt = PERCENT_LTYPE[ch]
ch = getc
elsif ch =~ /\W/
lt = "\""
else
RubyLex.fail SyntaxError, "unknown type of %string"
end
# if ch !~ /\W/
# ungetc
# next
# end
#@ltype = lt
@quoted = ch unless @quoted = PERCENT_PAREN[ch]
identify_string(lt, @quoted)
end
# File irb/ruby-lex.rb, line 1040
def identify_string(ltype, quoted = ltype)
@ltype = ltype
@quoted = quoted
subtype = nil
begin
nest = 0
while ch = getc
if @quoted == ch and nest == 0
break
elsif @ltype != "'" && ch == "#" && peek(0) == "{"
identify_string_dvar
elsif @ltype != "'" && @ltype != "]" && @ltype != ":" and ch == "#"
subtype = true
elsif ch == '\' and @ltype == "'" #'
case ch = getc
when "\\", "\n", "'"
else
ungetc
end
elsif ch == '\' #'
read_escape
end
if PERCENT_PAREN.values.include?(@quoted)
if PERCENT_PAREN[ch] == @quoted
nest += 1
elsif ch == @quoted
nest -= 1
end
end
end
if @ltype == "/"
while /[imxoesun]/ =~ peek(0)
getc
end
end
if subtype
Token(DLtype2Token[ltype])
else
Token(Ltype2Token[ltype])
end
ensure
@ltype = nil
@quoted = nil
@lex_state = EXPR_END
end
end
# File irb/ruby-lex.rb, line 1087
def identify_string_dvar
begin
getc
reserve_continue = @continue
reserve_ltype = @ltype
reserve_indent = @indent
reserve_indent_stack = @indent_stack
reserve_state = @lex_state
reserve_quoted = @quoted
@ltype = nil
@quoted = nil
@indent = 0
@indent_stack = []
@lex_state = EXPR_BEG
loop do
@continue = false
prompt
tk = token
if @ltype or @continue or @indent > 0
next
end
break if tk.kind_of?(TkRBRACE)
end
ensure
@continue = reserve_continue
@ltype = reserve_ltype
@indent = reserve_indent
@indent_stack = reserve_indent_stack
@lex_state = reserve_state
@quoted = reserve_quoted
end
end
# File irb/ruby-lex.rb, line 210
def initialize_input
@ltype = nil
@quoted = nil
@indent = 0
@indent_stack = []
@lex_state = EXPR_BEG
@space_seen = false
@here_header = false
@continue = false
prompt
@line = ""
@exp_line_no = @line_no
end
# File irb/ruby-lex.rb, line 261
def lex
until (((tk = token).kind_of?(TkNL) || tk.kind_of?(TkEND_OF_SCRIPT)) &&
!@continue or
tk.nil?)
#p tk
#p @lex_state
#p self
end
line = get_readed
# print self.inspect
if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil?
nil
else
line
end
end
# File irb/ruby-lex.rb, line 338
def lex_init()
@OP = IRB::SLex.new
@OP.def_rules("\0", "\004", "\032") do |op, io|
Token(TkEND_OF_SCRIPT)
end
@OP.def_rules(" ", "\t", "\f", "\r", "\13") do |op, io|
@space_seen = true
while getc =~ /[ \t\f\r\13]/; end
ungetc
Token(TkSPACE)
end
@OP.def_rule("#") do |op, io|
identify_comment
end
@OP.def_rule("=begin",
proc{|op, io| @prev_char_no == 0 && peek(0) =~ /\s/}) do
|op, io|
@ltype = "="
until getc == "\n"; end
until peek_equal?("=end") && peek(4) =~ /\s/
until getc == "\n"; end
end
gets
@ltype = nil
Token(TkRD_COMMENT)
end
@OP.def_rule("\n") do |op, io|
print "\\n\n" if RubyLex.debug?
case @lex_state
when EXPR_BEG, EXPR_FNAME, EXPR_DOT
@continue = true
else
@continue = false
@lex_state = EXPR_BEG
until (@indent_stack.empty? ||
[TkLPAREN, TkLBRACK, TkLBRACE,
TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
@indent_stack.pop
end
end
@here_header = false
@here_readed = []
Token(TkNL)
end
@OP.def_rules("*", "**",
"=", "==", "===",
"=~", "<=>",
"<", "<=",
">", ">=", ">>",
"!", "!=", "!~") do
|op, io|
case @lex_state
when EXPR_FNAME, EXPR_DOT
@lex_state = EXPR_ARG
else
@lex_state = EXPR_BEG
end
Token(op)
end
@OP.def_rules("<<") do
|op, io|
tk = nil
if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
(@lex_state != EXPR_ARG || @space_seen)
c = peek(0)
if /\S/ =~ c && (/["'`]/ =~ c || /\w/ =~ c || c == "-")
tk = identify_here_document
end
end
unless tk
tk = Token(op)
case @lex_state
when EXPR_FNAME, EXPR_DOT
@lex_state = EXPR_ARG
else
@lex_state = EXPR_BEG
end
end
tk
end
@OP.def_rules("'", '"') do
|op, io|
identify_string(op)
end
@OP.def_rules("`") do
|op, io|
if @lex_state == EXPR_FNAME
@lex_state = EXPR_END
Token(op)
else
identify_string(op)
end
end
@OP.def_rules('?') do
|op, io|
if @lex_state == EXPR_END
@lex_state = EXPR_BEG
Token(TkQUESTION)
else
ch = getc
if @lex_state == EXPR_ARG && ch =~ /\s/
ungetc
@lex_state = EXPR_BEG;
Token(TkQUESTION)
else
if (ch == '\')
read_escape
end
@lex_state = EXPR_END
Token(TkINTEGER)
end
end
end
@OP.def_rules("&", "&&", "|", "||") do
|op, io|
@lex_state = EXPR_BEG
Token(op)
end
@OP.def_rules("+=", "-=", "*=", "**=",
"&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
|op, io|
@lex_state = EXPR_BEG
op =~ /^(.*)=$/
Token(TkOPASGN, $1)
end
@OP.def_rule("+@", proc{|op, io| @lex_state == EXPR_FNAME}) do
|op, io|
@lex_state = EXPR_ARG
Token(op)
end
@OP.def_rule("-@", proc{|op, io| @lex_state == EXPR_FNAME}) do
|op, io|
@lex_state = EXPR_ARG
Token(op)
end
@OP.def_rules("+", "-") do
|op, io|
catch(:RET) do
if @lex_state == EXPR_ARG
if @space_seen and peek(0) =~ /[0-9]/
throw :RET, identify_number
else
@lex_state = EXPR_BEG
end
elsif @lex_state != EXPR_END and peek(0) =~ /[0-9]/
throw :RET, identify_number
else
@lex_state = EXPR_BEG
end
Token(op)
end
end
@OP.def_rule(".") do
|op, io|
@lex_state = EXPR_BEG
if peek(0) =~ /[0-9]/
ungetc
identify_number
else
# for "obj.if" etc.
@lex_state = EXPR_DOT
Token(TkDOT)
end
end
@OP.def_rules("..", "...") do
|op, io|
@lex_state = EXPR_BEG
Token(op)
end
lex_int2
end
# File irb/ruby-lex.rb, line 527
def lex_int2
@OP.def_rules("]", "}", ")") do
|op, io|
@lex_state = EXPR_END
@indent -= 1
@indent_stack.pop
Token(op)
end
@OP.def_rule(":") do
|op, io|
if @lex_state == EXPR_END || peek(0) =~ /\s/
@lex_state = EXPR_BEG
Token(TkCOLON)
else
@lex_state = EXPR_FNAME;
Token(TkSYMBEG)
end
end
@OP.def_rule("::") do
|op, io|
# p @lex_state.id2name, @space_seen
if @lex_state == EXPR_BEG or @lex_state == EXPR_ARG && @space_seen
@lex_state = EXPR_BEG
Token(TkCOLON3)
else
@lex_state = EXPR_DOT
Token(TkCOLON2)
end
end
@OP.def_rule("/") do
|op, io|
if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
identify_string(op)
elsif peek(0) == '='
getc
@lex_state = EXPR_BEG
Token(TkOPASGN, "/") #/)
elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
identify_string(op)
else
@lex_state = EXPR_BEG
Token("/") #/)
end
end
@OP.def_rules("^") do
|op, io|
@lex_state = EXPR_BEG
Token("^")
end
# @OP.def_rules("^=") do
# @lex_state = EXPR_BEG
# Token(OP_ASGN, :^)
# end
@OP.def_rules(",") do
|op, io|
@lex_state = EXPR_BEG
Token(op)
end
@OP.def_rules(";") do
|op, io|
@lex_state = EXPR_BEG
until (@indent_stack.empty? ||
[TkLPAREN, TkLBRACK, TkLBRACE,
TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
@indent_stack.pop
end
Token(op)
end
@OP.def_rule("~") do
|op, io|
@lex_state = EXPR_BEG
Token("~")
end
@OP.def_rule("~@", proc{|op, io| @lex_state == EXPR_FNAME}) do
|op, io|
@lex_state = EXPR_BEG
Token("~")
end
@OP.def_rule("(") do
|op, io|
@indent += 1
if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
@lex_state = EXPR_BEG
tk_c = TkfLPAREN
else
@lex_state = EXPR_BEG
tk_c = TkLPAREN
end
@indent_stack.push tk_c
Token(tk_c)
end
@OP.def_rule("[]", proc{|op, io| @lex_state == EXPR_FNAME}) do
|op, io|
@lex_state = EXPR_ARG
Token("[]")
end
@OP.def_rule("[]=", proc{|op, io| @lex_state == EXPR_FNAME}) do
|op, io|
@lex_state = EXPR_ARG
Token("[]=")
end
@OP.def_rule("[") do
|op, io|
@indent += 1
if @lex_state == EXPR_FNAME
tk_c = TkfLBRACK
else
if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
tk_c = TkLBRACK
elsif @lex_state == EXPR_ARG && @space_seen
tk_c = TkLBRACK
else
tk_c = TkfLBRACK
end
@lex_state = EXPR_BEG
end
@indent_stack.push tk_c
Token(tk_c)
end
@OP.def_rule("{") do
|op, io|
@indent += 1
if @lex_state != EXPR_END && @lex_state != EXPR_ARG
tk_c = TkLBRACE
else
tk_c = TkfLBRACE
end
@lex_state = EXPR_BEG
@indent_stack.push tk_c
Token(tk_c)
end
@OP.def_rule('\') do
|op, io|
if getc == "\n"
@space_seen = true
@continue = true
Token(TkSPACE)
else
read_escape
Token("\\")
end
end
@OP.def_rule('%') do
|op, io|
if @lex_state == EXPR_BEG || @lex_state == EXPR_MID
identify_quotation
elsif peek(0) == '='
getc
Token(TkOPASGN, :%)
elsif @lex_state == EXPR_ARG and @space_seen and peek(0) !~ /\s/
identify_quotation
else
@lex_state = EXPR_BEG
Token("%") #))
end
end
@OP.def_rule('$') do
|op, io|
identify_gvar
end
@OP.def_rule('@') do
|op, io|
if peek(0) =~ /[\w@]/
ungetc
identify_identifier
else
Token("@")
end
end
# @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do
# |op, io|
# @indent += 1
# @lex_state = EXPR_FNAME
# # @lex_state = EXPR_END
# # until @rests[0] == "\n" or @rests[0] == ";"
# # rests.shift
# # end
# end
@OP.def_rule("") do
|op, io|
printf "MATCH: start %s: %s\n", op, io.inspect if RubyLex.debug?
if peek(0) =~ /[0-9]/
t = identify_number
elsif peek(0) =~ /[^\x00-\/:-@\[-^`{-\x7F]/
t = identify_identifier
end
printf "MATCH: end %s: %s\n", op, io.inspect if RubyLex.debug?
t
end
p @OP if RubyLex.debug?
end
# File irb/ruby-lex.rb, line 179
def peek(i = 0)
while @rests.size <= i
return nil unless buf_input
end
@rests[i]
end
# File irb/ruby-lex.rb, line 164
def peek_equal?(str)
chrs = str.split(//)
until @rests.size >= chrs.size
return false unless buf_input
end
@rests[0, chrs.size] == chrs
end
# File irb/ruby-lex.rb, line 172
def peek_match?(regexp)
while @rests.empty?
return false unless buf_input
end
regexp =~ @rests.join("")
end
# File irb/ruby-lex.rb, line 204
def prompt
if @prompt
@prompt.call(@ltype, @indent, @continue, @line_no)
end
end
# File irb/ruby-lex.rb, line 1139
def read_escape
case ch = getc
when "\n", "\r", "\f"
when "\\", "n", "t", "r", "f", "v", "a", "e", "b", "s" #"
when /[0-7]/
ungetc ch
3.times do
case ch = getc
when /[0-7]/
when nil
break
else
ungetc
break
end
end
when "x"
2.times do
case ch = getc
when /[0-9a-fA-F]/
when nil
break
else
ungetc
break
end
end
when "M"
if (ch = getc) != '-'
ungetc
else
if (ch = getc) == "\\" #"
read_escape
end
end
when "C", "c" #, "^"
if ch == "C" and (ch = getc) != "-"
ungetc
elsif (ch = getc) == "\\" #"
read_escape
end
else
# other characters
end
end
io functions
# File irb/ruby-lex.rb, line 77
def set_input(io, p = nil, &block)
@io = io
if p.respond_to?(:call)
@input = p
elsif block_given?
@input = block
else
@input = Proc.new{@io.gets}
end
end
# File irb/ruby-lex.rb, line 195
def set_prompt(p = nil, &block)
p = block if block_given?
if p.respond_to?(:call)
@prompt = p
else
@prompt = Proc.new{print p}
end
end
# File irb/ruby-lex.rb, line 278
def token
# require "tracer"
# Tracer.on
@prev_seek = @seek
@prev_line_no = @line_no
@prev_char_no = @char_no
begin
begin
tk = @OP.match(self)
@space_seen = tk.kind_of?(TkSPACE)
rescue SyntaxError
raise if @exception_on_syntax_error
tk = TkError.new(@seek, @line_no, @char_no)
end
end while @skip_space and tk.kind_of?(TkSPACE)
if @readed_auto_clean_up
get_readed
end
# Tracer.off
tk
end
# File irb/ruby-lex.rb, line 143
def ungetc(c = nil)
if @here_readed.empty?
c2 = @readed.pop
else
c2 = @here_readed.pop
end
c = c2 unless c
@rests.unshift c #c =
@seek -= 1
if c == "\n"
@line_no -= 1
if idx = @readed.reverse.index("\n")
@char_no = @readed.size - idx
else
@char_no = @base_char_no + @readed.size
end
else
@char_no -= 1
end
end
Commenting is here to help enhance the documentation. For example, code samples, or clarification of the documentation.
If you have questions about Ruby or the documentation, please post to one of the Ruby mailing lists. You will get better, faster, help that way.
If you wish to post a correction of the docs, please do so, but also file bug report so that it can be corrected for the next release. Thank you.
If you want to help improve the Ruby documentation, please visit Documenting-ruby.org.