class Metasm::C::Parser
Dumper : objects => C source
Attributes
Public Class Methods
allowed arguments: ExeFormat, CPU, Preprocessor, Symbol (for the data model)
# File metasm/parse_c.rb, line 1185 def initialize(*args) model = args.grep(Symbol).first || :ilp32 lexer = args.grep(Preprocessor).first || Preprocessor.new @program = args.grep(ExeFormat).first cpu = args.grep(CPU).first cpu ||= @program.cpu if @program @lexer = lexer @prev_pragma_callback = @lexer.pragma_callback @lexer.pragma_callback = lambda { |tok| parse_pragma_callback(tok) } @toplevel = Block.new(nil) @unreadtoks = [] @endianness = cpu ? cpu.endianness : :big @typesize = { :void => 1, :__int8 => 1, :__int16 => 2, :__int32 => 4, :__int64 => 8, :char => 1, :float => 4, :double => 8, :longdouble => 12 } send model cpu.tune_cparser(self) if cpu @program.tune_cparser(self) if @program end
creates a new CParser, parses all top-level statements
# File metasm/parse_c.rb, line 1162 def self.parse(text) new.parse text end
Public Instance Methods
allocate an array of types init is either the length of the array, or an array of initial values
# File metasm/parse_c.rb, line 3188 def alloc_c_ary(typename, init=1) type = find_c_type(typename) len = init.kind_of?(Integer) ? init : init.length struct = C::Array.new(type, len) st = AllocCStruct.new(self, struct) if init.kind_of?(::Array) init.each_with_index { |v, i| st[i] = v } end st end
allocate a new AllocCStruct from the struct/struct typedef name of the current toplevel optionally populate the fields using the 'values' hash
# File metasm/parse_c.rb, line 3171 def alloc_c_struct(structname, values=nil) struct = find_c_struct(structname) st = AllocCStruct.new(self, struct) values.each { |k, v| st[k] = v } if values st end
checks that the types are compatible (variable predeclaration, function argument..) strict = false for func call/assignment (eg char compatible with int – but int is incompatible with char) output warnings only
# File metasm/parse_c.rb, line 1379 def check_compatible_type(tok, oldtype, newtype, strict = false, checked = []) return if not $VERBOSE oldtype = oldtype.untypedef newtype = newtype.untypedef oldtype = BaseType.new(:int) if oldtype.kind_of? Enum newtype = BaseType.new(:int) if newtype.kind_of? Enum puts tok.exception('type qualifier mismatch').message if oldtype.qualifier.to_a.uniq.length > newtype.qualifier.to_a.uniq.length # avoid infinite recursion return if checked.include? oldtype checked = checked + [oldtype] begin case newtype when Function raise tok, 'type error' if not oldtype.kind_of? Function check_compatible_type tok, oldtype.type, newtype.type, strict, checked if oldtype.args and newtype.args if oldtype.args.length != newtype.args.length or oldtype.varargs != newtype.varargs raise tok, 'type error' end oldtype.args.zip(newtype.args) { |oa, na| # begin ; rescue ParseError: raise $!.message + "in parameter #{oa.name}" end check_compatible_type tok, oa.type, na.type, strict, checked } end when Pointer if oldtype.kind_of? BaseType and oldtype.integral? puts tok.exception('making pointer from integer without a cast').message return end raise tok, 'type error' if not oldtype.kind_of? Pointer hasvoid = true if (t = newtype.type.untypedef).kind_of? BaseType and t.name == :void hasvoid = true if (t = oldtype.type.untypedef).kind_of? BaseType and t.name == :void # struct foo *f = NULL; if strict and not hasvoid check_compatible_type tok, oldtype.type, newtype.type, strict, checked end when Union raise tok, 'type error' if not oldtype.class == newtype.class if oldtype.members and newtype.members if oldtype.members.length != newtype.members.length raise tok, 'bad member count' end oldtype.members.zip(newtype.members) { |om, nm| # raise tok if om.name and nm.name and om.name != nm.name # don't care check_compatible_type tok, om.type, nm.type, strict, checked } end when BaseType raise tok, 'type error' if not oldtype.kind_of? BaseType if strict if oldtype.name != newtype.name or oldtype.specifier != newtype.specifier raise tok, 'type error' end else raise tok, 'type error' if @typesize[newtype.name] == 0 and @typesize[oldtype.name] > 0 puts tok.exception('type size mismatch, may lose bits').message if @typesize[oldtype.name] > @typesize[newtype.name] puts tok.exception('sign mismatch').message if oldtype.specifier != newtype.specifier and @typesize[newtype.name] == @typesize[oldtype.name] end end rescue ParseError raise $! if checked.length != 1 # bubble up oname = (oldtype.to_s rescue oldtype.class.name) nname = (newtype.to_s rescue newtype.class.name) puts $!.message + " incompatible type #{oname} to #{nname}" end end
checks that we are at the end of a statement, ie an ';' character (consumed), or a '}' (not consumed) otherwise, raise either the given token or self.
# File metasm/parse_c.rb, line 1535 def checkstatementend(tok=nil) raise tok || self, '";" expected' if not tok = skipspaces or tok.type != :punct or (tok.raw != ';' and tok.raw != '}') unreadtok tok if tok.raw == '}' end
“cast” a string to C::Array
# File metasm/parse_c.rb, line 3202 def decode_c_ary(typename, len, str, offset=0) type = find_c_type(typename) struct = C::Array.new(type, len) AllocCStruct.new(self, struct, str, offset) end
parse a given String as an AllocCStruct offset is an optionnal offset from the string start modification to the structure will modify the underlying string
# File metasm/parse_c.rb, line 3181 def decode_c_struct(structname, str, offset=0) struct = find_c_struct(structname) AllocCStruct.new(self, struct, str, offset) end
# File metasm/parse_c.rb, line 3237 def decode_c_value(str, type, off=0) type = type.type if type.kind_of? Variable type = type.untypedef if type.kind_of? C::Union or type.kind_of? C::Array return AllocCStruct.new(self, type, str, off) end val = Expression.decode_immediate(str, sizeof(type), @endianness, off) val = Expression.make_signed(val, sizeof(type)*8) if type.integral? and type.signed? val = nil if val == 0 and type.pointer? val end
returns a string containing the C definition(s) of toplevel functions, with their dependencies
# File metasm/parse_c.rb, line 3311 def dump_definition(*funcnames) oldst = @toplevel.statements @toplevel.statements = [] dump_definitions(funcnames.map { |f| @toplevel.symbol[f] }) ensure @toplevel.statements = oldst end
returns a big string representing the definitions of all terms appearing in
list
, excluding exclude
includes dependencies
# File metasm/parse_c.rb, line 3288 def dump_definitions(list, exclude=[]) # recurse all dependencies todo_rndr = {} todo_deps = {} list.each { |t| todo_rndr[t], todo_deps[t] = t.dump_def(@toplevel) } # c.toplevel.anonymous_enums.to_a.each { |t| todo_rndr[t], todo_deps[t] = t.dump_def(c.toplevel) } while !(ar = (todo_deps.values.flatten - todo_deps.keys)).empty? ar.each { |t| todo_rndr[t], todo_deps[t] = t.dump_def(@toplevel) } end exclude.each { |t| todo_deps.delete t ; todo_rndr.delete t } todo_deps.each_key { |t| todo_deps[t] -= exclude } all = @toplevel.struct.values + @toplevel.symbol.values all -= all.grep(::Integer) # Enum values @toplevel.dump_reorder(all, todo_rndr, todo_deps)[0].join("\n") end
convert (pack) a ruby value into a C buffer packs integers, converts Strings to their C pointer (using DynLdr)
# File metasm/parse_c.rb, line 3210 def encode_c_value(type, val) type = type.type if type.kind_of? Variable case val when nil; val = 0 when ::Integer when ::String val = DynLdr.str_ptr(val) when ::Hash type = type.pointed while type.pointer? raise "need a struct ptr for #{type} #{val.inspect}" if not type.kind_of? Union buf = alloc_c_struct(type, val) val.instance_variable_set('@rb2c', buf) # GC trick val = buf when ::Proc val = DynLdr.convert_rb2c(type, val) # allocate a DynLdr callback when AllocCStruct val = DynLdr.str_ptr(val.str) + val.stroff #when ::Float # TODO else raise "TODO #{val.inspect}" end val = Expression.encode_immediate(val, sizeof(type), @endianness) if val.kind_of?(::Integer) val end
# File metasm/parse_c.rb, line 1519 def eos? @unreadtoks.empty? and @lexer.eos? end
allows 'raise self'
# File metasm/parse_c.rb, line 1451 def exception(msg='EOF unexpected') @lexer.exception msg end
returns a big string containing all definitions from headers used in the source (including macros)
# File metasm/parse_c.rb, line 3257 def factorize(*a) factorize_init parse(*a) raise @lexer.readtok || self, 'eof expected' if not @lexer.eos? factorize_final end
# File metasm/parse_c.rb, line 3268 def factorize_final # now find all types/defs not coming from the standard headers # all all = @toplevel.struct.values + @toplevel.symbol.values all -= all.grep(::Integer) # Enum values # list of definitions of user-defined objects userdefined = all.find_all { |t| t.backtrace.backtrace.grep(::String).grep(/^</).empty? } @toplevel.statements.clear # don't want all Declarations # a macro is fine too @lexer.dump_macros(@lexer.traced_macros, false) + "\n\n" + dump_definitions(userdefined, userdefined) end
# File metasm/parse_c.rb, line 3264 def factorize_init @lexer.traced_macros = [] end
find a Struct/Union object from a struct name/typedef name raises if it cant find it
# File metasm/parse_c.rb, line 3133 def find_c_struct(structname) structname = structname.to_s if structname.kind_of?(::Symbol) if structname.kind_of?(::String) and not struct = @toplevel.struct[structname] struct = @toplevel.symbol[structname] raise "unknown struct #{structname.inspect}" if not struct struct = struct.type.untypedef struct = struct.pointed while struct.pointer? raise "unknown struct #{structname.inspect}" if not struct.kind_of? C::Union end struct = structname if structname.kind_of? C::Union raise "unknown struct #{structname.inspect}" if not struct.kind_of? C::Union struct end
find a C::Type (struct/union/typedef/basetype) from a string
# File metasm/parse_c.rb, line 3148 def find_c_type(typename) typename = typename.to_s if typename.kind_of? ::Symbol if typename.kind_of?(::String) and not type = @toplevel.struct[typename] if type = @toplevel.symbol[typename] type = type.type.untypedef else begin lexer.feed(typename) b = C::Block.new(@toplevel) var = Variable.parse_type(self, b) var.parse_declarator(self, b) type = var.type rescue end end end type = typename if typename.kind_of?(C::Type) raise "unknown type #{typename.inspect}" if not type.kind_of? C::Type type end
# File metasm/parse_c.rb, line 1204 def ilp16 @typesize.update :short => 2, :ptr => 2, :int => 2, :long => 4, :longlong => 4 end
# File metasm/parse_c.rb, line 1213 def ilp32 @typesize.update :short => 2, :ptr => 4, :int => 4, :long => 4, :longlong => 8 end
# File metasm/parse_c.rb, line 1226 def ilp64 @typesize.update :short => 2, :ptr => 8, :int => 8, :long => 8, :longlong => 8 end
# File metasm/parse_c.rb, line 1218 def llp64 @typesize.update :short => 2, :ptr => 8, :int => 4, :long => 4, :longlong => 8 end
# File metasm/parse_c.rb, line 1209 def lp32 @typesize.update :short => 2, :ptr => 4, :int => 2, :long => 4, :longlong => 8 end
# File metasm/parse_c.rb, line 1222 def lp64 @typesize.update :short => 2, :ptr => 8, :int => 4, :long => 8, :longlong => 8 end
check if a macro definition has a numeric value returns this value or nil
# File metasm/parse_c.rb, line 1826 def macro_numeric(m) d = @lexer.definition[m] return if not d.kind_of? Preprocessor::Macro or d.args or d.varargs # filter metasm-defined vars (eg __PE__ / _M_IX86) return if not d.name or not bt = d.name.backtrace or (bt[0][0] != ?" and bt[0][0] != ?<) raise 'cannot macro_numeric with unparsed data' if not eos? @lexer.feed m if e = CExpression.parse(self, Block.new(@toplevel)) and eos? v = e.reduce(self) return v if v.kind_of? ::Numeric end readtok until eos? nil rescue ParseError readtok until eos? nil end
returns all numeric constants defined with their value, either macros or enums for enums, also return the enum name
# File metasm/parse_c.rb, line 1846 def numeric_constants ret = [] # macros @lexer.definition.each_key { |k| if v = macro_numeric(k) ret << [k, v] end } # enums seen_enum = {} @toplevel.struct.each { |k, v| if v.kind_of?(Enum) v.members.each { |kk, vv| ret << [kk, vv, k] seen_enum[kk] = true } end } @toplevel.symbol.each { |k, v| ret << [k, v] if v.kind_of?(::Numeric) and not seen_enum[k] } ret end
parses the current lexer content (or the text arg) for toplevel definitions
# File metasm/parse_c.rb, line 1167 def parse(text=nil, filename='<unk>', lineno=1) @lexer.feed text, filename, lineno if text nil while not @lexer.eos? and (parse_definition(@toplevel) or parse_toplevel_statement(@toplevel)) raise @lexer.readtok || self, 'invalid definition' if not @lexer.eos? sanity_checks self end
parses variable/function definition/declaration/initialization populates scope.symbols and scope.struct raises on redefinitions returns false if no definition found
# File metasm/parse_c.rb, line 1601 def parse_definition(scope) return false if not basetype = Variable.parse_type(self, scope, true) # check struct predeclaration tok = skipspaces if tok and tok.type == :punct and tok.raw == ';' and basetype.type and (basetype.type.kind_of? Union or basetype.type.kind_of? Enum) return true else unreadtok tok end nofunc = false loop do var = basetype.dup var.parse_declarator(self, scope) raise var.backtrace if not var.name # barrel roll if prev = scope.symbol[var.name] if prev.kind_of? TypeDef and var.storage == :typedef check_compatible_type(var.backtrace, prev.type, var.type, true) # windows.h redefines many typedefs with the same definition puts "redefining typedef #{var.name}" if $VERBOSE var = prev elsif not prev.kind_of?(Variable) or prev.initializer or (prev.storage != :extern and prev.storage != var.storage) or (scope != @toplevel and prev.storage != :static) if prev.kind_of? ::Integer # enum value prev = (scope.struct.values.grep(Enum) + scope.anonymous_enums.to_a).find { |e| e.members.index(prev) } end raise var.backtrace, "redefinition, previous is #{prev.backtrace.exception(nil).message rescue :unknown}" else check_compatible_type var.backtrace, prev.type, var.type, true (var.attributes ||= []).concat prev.attributes if prev.attributes end elsif var.storage == :typedef attrs = var.attributes var = TypeDef.new var.name, var.type, var.backtrace var.attributes = attrs if attrs end scope.statements << Declaration.new(var) unless var.kind_of? TypeDef raise tok || self, 'punctuation expected' if not tok = skipspaces or (tok.type != :punct and not %w[asm __asm __asm__].include? tok.raw) case tok.raw when '{' # function body raise tok if nofunc or not var.kind_of? Variable or not var.type.kind_of? Function scope.symbol[var.name] = var body = var.initializer = Block.new(scope) var.type.args ||= [] var.type.args.each { |v| # put func parameters in func body scope # arg redefinition is checked in parse_declarator if not v.name puts "unnamed argument in definition of #{var.name}" if $DEBUG next # should raise to be compliant end body.symbol[v.name] = v # XXX will need special check in stack allocator } loop do raise tok || self, var.backtrace.exception('"}" expected for end of function') if not tok = skipspaces break if tok.type == :punct and tok.raw == '}' unreadtok tok if not parse_definition(body) body.statements << parse_statement(body, [var.type.type]) end end if $VERBOSE and not body.statements.last.kind_of? Return and not body.statements.last.kind_of? Asm puts tok.exception('missing function return value').message if not var.type.type.untypedef.kind_of? BaseType or var.type.type.untypedef.name != :void end break when 'asm', '__asm', '__asm__' # GCC function redirection # void foo(void) __asm__("bar"); => when code uses 'foo', silently redirect to 'bar' instead raise tok if nofunc or not var.kind_of? Variable or not var.type.kind_of? Function # most of the time, 'bar' is not defined anywhere, so we support it only # to allow parsing of headers using it, hoping noone will actually use them unused = Asm.parse(self, scope) puts "unsupported gcc-style __asm__ function redirect #{var.name.inspect} => #{unused.body.inspect}" if $VERBOSE break when '=' # variable initialization raise tok, '"{" or ";" expected' if var.type.kind_of? Function raise tok, 'cannot initialize extern variable' if var.storage == :extern scope.symbol[var.name] = var # allow initializer to reference var, eg 'void *ptr = &ptr;' var.initializer = var.type.parse_initializer(self, scope) if var.initializer.kind_of?(CExpression) and (scope == @toplevel or var.storage == :static) raise tok, "initializer for static #{var.name} is not constant" if not var.initializer.constant? end reference_value = lambda { |e, v| found = false case e when Variable; found = true if e == v when CExpression; e.walk { |ee| found ||= reference_value[ee, v] } if e.op != :& or e.lexpr end found } raise tok, "initializer for #{var.name} is not constant (selfreference)" if reference_value[var.initializer, var] raise tok || self, '"," or ";" expected' if not tok = skipspaces or tok.type != :punct else scope.symbol[var.name] = var end case tok.raw when ','; nofunc = true when ';'; break when '}'; unreadtok(tok); break else raise tok, '";" or "," expected' end end true end
parses a C file
# File metasm/parse_c.rb, line 1176 def parse_file(file) parse(File.read(file), file) end
# File metasm/parse_c.rb, line 1231 def parse_pragma_callback(otok) case otok.raw when 'pack' nil while lp = @lexer.readtok and lp.type == :space nil while rp = @lexer.readtok and rp.type == :space if not rp or rp.type != :punct or rp.raw != ')' v1 = rp nil while rp = @lexer.readtok and rp.type == :space end if rp and rp.type == :punct and rp.raw == ',' nil while v2 = @lexer.readtok and v2.type == :space nil while rp = @lexer.readtok and rp.type == :space end raise otok if not rp or lp.type != :punct or rp.type != :punct or lp.raw != '(' or rp.raw != ')' raise otok if (v1 and v1.type != :string) or (v2 and (v2.type != :string or v2.raw =~ /[^\d]/)) if not v1 @pragma_pack = nil elsif v1.raw == 'push' @pragma_pack_stack ||= [] @pragma_pack_stack << pragma_pack @pragma_pack = v2.raw.to_i if v2 raise v2, 'bad pack value' if pragma_pack == 0 elsif v1.raw == 'pop' @pragma_pack_stack ||= [] raise v1, 'pack stack empty' if @pragma_pack_stack.empty? @pragma_pack = @pragma_pack_stack.pop @pragma_pack = v2.raw.to_i if v2 and v2.raw # #pragma pack(pop, 4) => pop stack, but use 4 as pack value (imho) raise v2, 'bad pack value' if @pragma_pack == 0 elsif v1.raw =~ /^\d+$/ raise v2, '2nd arg unexpected' if v2 @pragma_pack = v1.raw.to_i raise v1, 'bad pack value' if @pragma_pack == 0 else raise otok end # the caller checks for :eol when 'warning' if $DEBUG @prev_pragma_callback[otok] else # silent discard nil while tok = @lexer.readtok_nopp and tok.type != :eol @lexer.unreadtok tok end when 'prepare_visualstudio' prepare_visualstudio when 'prepare_gcc' prepare_gcc when 'data_model' # XXX use carefully, should be the very first thing parsed nil while lp = @lexer.readtok and lp.type == :space if lp.type != :string or lp.raw !~ /^s?[il]?lp(16|32|64)$/ or not respond_to? lp.raw raise lp, "invalid data model (use lp32/lp64/llp64/ilp64)" else send lp.raw end else @prev_pragma_callback[otok] end end
returns a statement or raise
# File metasm/parse_c.rb, line 1732 def parse_statement(scope, nest) raise self, 'statement expected' if not tok = skipspaces if tok.type == :punct and tok.raw == '{' body = Block.new scope loop do raise tok || self, '"}" expected' if not tok = skipspaces break if tok.type == :punct and tok.raw == '}' unreadtok tok if not parse_definition(body) body.statements << parse_statement(body, nest) end end return body elsif tok.type == :punct and tok.raw == ';' return Block.new(scope) elsif tok.type != :string unreadtok tok raise tok, 'expr expected' if not expr = CExpression.parse(self, scope) checkstatementend(tok) if $VERBOSE and not nest.include?(:expression) and (expr.op or not expr.type.untypedef.kind_of? BaseType or expr.type.untypedef.name != :void) and CExpression.constant?(expr) puts tok.exception("statement with no effect : #{expr}").message end return expr end case tok.raw when 'if' If.parse self, scope, nest when 'while' While.parse self, scope, nest when 'do' DoWhile.parse self, scope, nest when 'for' For.parse self, scope, nest when 'switch' Switch.parse self, scope, nest when 'goto' raise tok || self, 'label expected' if not tok = skipspaces or tok.type != :string name = tok.raw checkstatementend(tok) Goto.new name when 'return' expr = CExpression.parse(self, scope) # nil allowed raise tok || self, "cannot return #{expr} in function returning void" if expr and nest[0].kind_of?(Type) and nest[0].void? p, i = nest[0].pointer?, nest[0].integral? if expr r = expr.reduce(self) if p or i if (not p and not i) or (i and not r.kind_of? ::Integer) or (p and r != 0) check_compatible_type(tok, (expr ? expr.type : BaseType.new(:void)), nest[0]) end checkstatementend(tok) Return.new expr when 'case' raise tok, 'case out of switch' if not nest.include? :switch Case.parse self, scope, nest when 'default' raise tok || self, '":" expected' if not tok = skipspaces or tok.type != :punct or tok.raw != ':' raise tok, 'case out of switch' if not nest.include? :switch Case.new 'default', nil, parse_statement(scope, nest) when 'continue' checkstatementend(tok) raise tok, 'continue out of loop' if not nest.include? :loop Continue.new when 'break' checkstatementend(tok) raise tok, 'break out of loop' if not nest.include? :loop and not nest.include? :switch Break.new when 'asm', '__asm', '__asm__' Asm.parse self, scope else if ntok = skipspaces and ntok.type == :punct and ntok.raw == ':' begin st = parse_statement(scope, nest) rescue ParseError puts "label without statement, #{$!.message}" if $VERBOSE end Label.new tok.raw, st else unreadtok ntok unreadtok tok raise tok, 'expr expected' if not expr = CExpression.parse(self, scope) checkstatementend(tok) if $VERBOSE and not nest.include?(:expression) and (expr.op or not expr.type.untypedef.kind_of? BaseType or expr.type.untypedef.name != :void) and CExpression.constant?(expr) puts tok.exception("statement with no effect : #{expr}").message end expr end end end
parses toplevel statements, return nil if none found toplevel statements are ';' and 'asm <..>'
# File metasm/parse_c.rb, line 1719 def parse_toplevel_statement(scope) if tok = skipspaces and tok.type == :punct and tok.raw == ';' true elsif tok and tok.type == :punct and tok.raw == '{' raise tok || self, '"}" expected' if not tok = skipspaces or tok.type != :punct or tok.raw != '}' true elsif tok and tok.type == :string and %w[asm __asm __asm__].include? tok.raw scope.statements << Asm.parse(self, scope) true end end
# File metasm/compile_c.rb, line 13 def precompile @toplevel.precompile(Compiler.new(self, @program)) self end
# File metasm/parse_c.rb, line 1302 def prepare_gcc @lexer.define_weak('__GNUC__', 2) # otherwise __attribute__ is defined to void.. @lexer.define_weak('__STDC__') @lexer.define_weak('__const', 'const') @lexer.define_weak('__signed', 'signed') @lexer.define_weak('__signed__', 'signed') @lexer.define_weak('__volatile', 'volatile') if not @lexer.definition['__builtin_constant_p'] # magic macro to check if its arg is an immediate value @lexer.define_weak('__builtin_constant_p', '0') @lexer.definition['__builtin_constant_p'].args = [Preprocessor::Token.new([])] end @lexer.nodefine_strong('alloca') # TODO __builtin_alloca @lexer.hooked_include['stddef.h'] = <<EOH /* simplified, define all at first invocation. may break things... */ #undef __need_ptrdiff_t #undef __need_size_t #undef __need_wint_t #undef __need_wchar_t #undef __need_NULL #undef NULL #if !defined (_STDDEF_H) #define _STDDEF_H #define __PTRDIFF_TYPE__ long int typedef __PTRDIFF_TYPE__ ptrdiff_t; #define __SIZE_TYPE__ long unsigned int typedef __SIZE_TYPE__ size_t; #define __WINT_TYPE__ unsigned int typedef __WINT_TYPE__ wint_t; #define __WCHAR_TYPE__ int typedef __WCHAR_TYPE__ wchar_t; #define NULL 0 #define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER) #endif EOH # TODO va_args @lexer.hooked_include['stdarg.h'] = <<EOH // TODO typedef void* __gnuc_va_list; /* typedef void* va_list; #define va_start(v, l) #define va_end(v) #define va_arg(v, l) #define va_copy(d, s) */ EOH @lexer.hooked_include['limits.h'] = <<EOH #define CHAR_BIT 8 #define SCHAR_MIN (-128) #define SCHAR_MAX 127 #define UCHAR_MAX 255 #ifdef __CHAR_UNSIGNED__ # define CHAR_MIN 0 # define CHAR_MAX UCHAR_MAX #else # define CHAR_MIN SCHAR_MIN # define CHAR_MAX SCHAR_MAX #endif #define UINT_MAX #{(1 << (8*@typesize[:int]))-1}U #define INT_MAX (UINT_MAX >> 1) #define INT_MIN (-INT_MAX - 1) #define ULONG_MAX #{(1 << (8*@typesize[:long]))-1}UL #define LONG_MAX (ULONG_MAX >> 1L) #define LONG_MIN (-LONG_MAX - 1L) EOH end
# File metasm/parse_c.rb, line 1289 def prepare_visualstudio @lexer.define_weak('_WIN32') @lexer.define_weak('_WIN32_WINNT', 0x500) @lexer.define_weak('_INTEGRAL_MAX_BITS', 64) @lexer.define_weak('__w64') @lexer.define_weak('_cdecl', '__cdecl') # typo ? seen in winreg.h @lexer.define_weak('_fastcall', '__fastcall') # typo ? seen in ntddk.h @lexer.define_weak('_MSC_VER', 1300) # handle '#pragma once' and _declspec(noreturn) @lexer.define_weak('__forceinline', '__inline') @lexer.define_weak('__ptr32') # needed with msc_ver 1300, don't understand their use @lexer.define_weak('__ptr64') end
reads a token from self.lexer concatenates strings, merges spaces/eol to ' ', handles wchar strings, allows $@_ in :string
# File metasm/parse_c.rb, line 1477 def readtok if not t = @unreadtoks.pop return if not t = readtok_longstr case t.type when :space, :eol # merge consecutive :space/:eol t = t.dup t.type = :space t.raw = ' ' nil while nt = @lexer.readtok and (nt.type == :eol or nt.type == :space) @lexer.unreadtok nt when :quoted # merge consecutive :quoted t = t.dup while nt = readtok_longstr case nt.type when :quoted if t.raw[0] == ?" and nt.raw[0, 2] == 'L"' # ensure wide prefix is set t.raw[0, 0] = 'L' end t.raw << ' ' << nt.raw t.value << nt.value when :space, :eol else break end end @lexer.unreadtok nt else if (t.type == :punct and (t.raw == '_' or t.raw == '@' or t.raw == '$')) or t.type == :string t = t.dup t.type = :string nt = nil t.raw << nt.raw while nt = @lexer.readtok and ((nt.type == :punct and (nt.raw == '_' or nt.raw == '@' or nt.raw == '$')) or nt.type == :string) @lexer.unreadtok nt end end end t end
C sanity checks
# File metasm/parse_c.rb, line 1371 def sanity_checks return if not $VERBOSE # TODO end
returns the size of a type in bytes
# File metasm/parse_c.rb, line 1541 def sizeof(var, type=nil) var, type = nil, var if var.kind_of? Type and not type type ||= var.type # XXX double-check class apparition order ('when' checks inheritance) case type when Array case type.length when nil if var.kind_of? CExpression and not var.lexpr and not var.op and var.rexpr.kind_of? Variable var = var.rexpr end raise self, 'unknown array size' if not var.kind_of? Variable or not var.initializer init = var.initializer init = init.rexpr if init.kind_of? C::CExpression and not init.op and init.rexpr.kind_of? ::String case init when ::String; sizeof(nil, type.type) * (init.length + 1) when ::Array v = init.compact.first v ? (sizeof(nil, type.type) * init.length) : 0 else sizeof(init) end when ::Integer; type.length * sizeof(type.type) when CExpression len = type.length.reduce(self) raise self, 'unknown array size' if not len.kind_of? ::Integer len * sizeof(type) else raise self, 'unknown array size' end when Pointer if var.kind_of? CExpression and not var.op and var.rexpr.kind_of? ::String # sizeof("lolz") => 5 sizeof(nil, type.type) * (var.rexpr.length + 1) else @typesize[:ptr] end when Function # raise 1 # gcc when BaseType @typesize[type.name] when Enum @typesize[:int] when Struct raise self, "unknown structure size #{type.name}" if not type.members al = type.align(self) al = 1 if (var.kind_of?(Attributes) and var.has_attribute('sizeof_packed')) or type.has_attribute('sizeof_packed') lm = type.members.last lm ? (type.offsetof(self, lm) + sizeof(lm) + al - 1) / al * al : 0 when Union raise self, "unknown structure size #{type.name}" if not type.members type.members.map { |m| sizeof(m) }.max || 0 when TypeDef sizeof(var, type.type) end end
returns the next non-space/non-eol token
# File metasm/parse_c.rb, line 1528 def skipspaces nil while t = readtok and t.type == :space t end
# File metasm/parse_c.rb, line 3319 def to_s @toplevel.dump(nil)[0].join("\n") end
# File metasm/parse_c.rb, line 1523 def unreadtok(tok) @unreadtoks << tok if tok end
Private Instance Methods
reads a token, convert 'L“foo”' to a :quoted
# File metasm/parse_c.rb, line 1456 def readtok_longstr if t = @lexer.readtok and t.type == :string and t.raw == 'L' and nt = @lexer.readtok and nt.type == :quoted and nt.raw[0] == ?" nt.raw[0, 0] = 'L' nt elsif t and t.type == :punct and t.raw == '/' and # nt has not been read nt = @lexer.readtok and nt.type == :punct and nt.raw == '/' # windows.h has a #define some_type_name /##/, and VS interprets this as a comment.. puts @lexer.exception('#defined //').message if $VERBOSE t = @lexer.readtok while t and t.type != :eol t else @lexer.unreadtok nt t end end