module XSD::Charset

Constants

CharsetMap
CharsetStrCache
EUCRegexp
EncodingConvertMap

Maps

SJISRegexp
USASCIIRegexp
UTF8Regexp

Public Class Methods

charset_label(encoding) click to toggle source
# File lib/xsd/charset.rb, line 118
def Charset.charset_label(encoding)
  CharsetMap[encoding.upcase]
end
charset_str(label) click to toggle source
# File lib/xsd/charset.rb, line 122
def Charset.charset_str(label)
  if CharsetMap.respond_to?(:key)
    CharsetStrCache[label] ||= CharsetMap.key(label.downcase) || 'X_UNKNOWN'
  else
    CharsetStrCache[label] ||= CharsetMap.index(label.downcase) || 'X_UNKNOWN'
  end
end
encoding() click to toggle source

handlers

# File lib/xsd/charset.rb, line 86
def Charset.encoding
  @internal_encoding
end
encoding=(encoding) click to toggle source
# File lib/xsd/charset.rb, line 90
def Charset.encoding=(encoding)
  warn("xsd charset is set to #{encoding}") if $DEBUG
  @internal_encoding = encoding
end
encoding_conv(str, enc_from, enc_to) click to toggle source
# File lib/xsd/charset.rb, line 107
def Charset.encoding_conv(str, enc_from, enc_to)
  if enc_from == enc_to or enc_from == 'NONE' or enc_to == 'NONE'
    str
  elsif converter = EncodingConvertMap[[enc_from, enc_to]]
    converter.call(str)
  else
    raise CharsetConversionError.new(
      "Converter not found: #{enc_from} -> #{enc_to}")
  end
end
encoding_from_xml(str, charset) click to toggle source
# File lib/xsd/charset.rb, line 103
def Charset.encoding_from_xml(str, charset)
  encoding_conv(str, charset_str(charset), @internal_encoding)
end
encoding_to_xml(str, charset) click to toggle source
# File lib/xsd/charset.rb, line 99
def Charset.encoding_to_xml(str, charset)
  encoding_conv(str, @internal_encoding, charset_str(charset))
end
init() click to toggle source
# File lib/xsd/charset.rb, line 26
def Charset.init
  EncodingConvertMap[['UTF8', 'X_ISO_8859_1']] =
    Proc.new { |str| str.unpack('U*').pack('C*') }
  EncodingConvertMap[['X_ISO_8859_1', 'UTF8']] =
    Proc.new { |str| str.unpack('C*').pack('U*') }
  begin
    require 'xsd/iconvcharset'
    @internal_encoding = 'UTF8'
    sjtag = (/(mswin|bccwin|mingw|cygwin|emx)/ =~ RUBY_PLATFORM) ? 'cp932' :
      'shift_jis'
    EncodingConvertMap[['UTF8', 'EUC' ]] =
      Proc.new { |str| IconvCharset.safe_iconv("euc-jp", "utf-8", str) }
    EncodingConvertMap[['EUC' , 'UTF8']] =
      Proc.new { |str| IconvCharset.safe_iconv("utf-8", "euc-jp", str) }
    EncodingConvertMap[['EUC' , 'SJIS']] =
      Proc.new { |str| IconvCharset.safe_iconv(sjtag, "euc-jp", str) }
    EncodingConvertMap[['UTF8', 'SJIS']] =
      Proc.new { |str| IconvCharset.safe_iconv(sjtag, "utf-8", str) }
    EncodingConvertMap[['SJIS', 'UTF8']] =
      Proc.new { |str| IconvCharset.safe_iconv("utf-8", sjtag, str) }
    EncodingConvertMap[['SJIS', 'EUC' ]] =
      Proc.new { |str| IconvCharset.safe_iconv("euc-jp", sjtag, str) }
  rescue LoadError
    begin
      require 'nkf'
      EncodingConvertMap[['EUC' , 'SJIS']] =
        Proc.new { |str| NKF.nkf('-sXm0', str) }
      EncodingConvertMap[['SJIS', 'EUC' ]] =
        Proc.new { |str| NKF.nkf('-eXm0', str) }
    rescue LoadError
    end

    begin
      require 'uconv'
      @internal_encoding = 'UTF8'
      EncodingConvertMap[['UTF8', 'EUC' ]] = Uconv.method(:u8toeuc)
      EncodingConvertMap[['UTF8', 'SJIS']] = Uconv.method(:u8tosjis)
      EncodingConvertMap[['EUC' , 'UTF8']] = Uconv.method(:euctou8)
      EncodingConvertMap[['SJIS', 'UTF8']] = Uconv.method(:sjistou8)
    rescue LoadError
    end
  end
end
is_ces(str, code = @internal_encoding) click to toggle source
# File lib/xsd/charset.rb, line 172
def Charset.is_ces(str, code = @internal_encoding)
  case code
  when 'NONE'
    is_us_ascii(str)
  when 'UTF8'
    is_utf8(str)
  when 'EUC'
    is_euc(str)
  when 'SJIS'
    is_sjis(str)
  else
    raise UnknownCharsetError.new("Unknown charset: #{code}")
  end
end
is_euc(str) click to toggle source
# File lib/xsd/charset.rb, line 164
def Charset.is_euc(str)
  EUCRegexp =~ str
end
is_sjis(str) click to toggle source
# File lib/xsd/charset.rb, line 168
def Charset.is_sjis(str)
  SJISRegexp =~ str
end
is_us_ascii(str) click to toggle source
# File lib/xsd/charset.rb, line 156
def Charset.is_us_ascii(str)
  USASCIIRegexp =~ str
end
is_utf8(str) click to toggle source
# File lib/xsd/charset.rb, line 160
def Charset.is_utf8(str)
  UTF8Regexp =~ str
end
xml_encoding_label() click to toggle source
# File lib/xsd/charset.rb, line 95
def Charset.xml_encoding_label
  charset_label(@internal_encoding)
end