# If you're interested in this you might also like to check out # SafeJSON # http://www.xerial.org/projects/Xerial/browser/ruby/trunk/aqua/mixi/SafeJSON.rb?format=txt require 'strscan' # This module is the namespace for all the JSON related classes. It also # defines some module functions to expose a nicer API to users, instead # of using the parser and other classes directly. module JSON # The base exception for JSON errors. JSONError = Class.new StandardError # This exception is raise, if a parser error occurs. ParserError = Class.new JSONError # This exception is raise, if a unparser error occurs. UnparserError = Class.new JSONError # If a circular data structure is encountered while unparsing # this exception is raised. CircularDatastructure = Class.new UnparserError class << self # Switches on Unicode support, if _enable_ is _true_. Otherwise switches # Unicode support off. def support_unicode=(enable) @support_unicode = enable end # Returns _true_ if JSON supports unicode, otherwise _false_ is returned. def support_unicode? !!@support_unicode end end JSON.support_unicode = true # default, hower it's possible to switch off full # unicode support, if non-ascii bytes should be # just passed through. begin require 'iconv' # An iconv instance to convert from UTF8 to UTF16 Big Endian. UTF16toUTF8 = Iconv.new('utf-8', 'utf-16be') # An iconv instance to convert from UTF16 Big Endian to UTF8. UTF8toUTF16 = Iconv.new('utf-16be', 'utf-8'); UTF8toUTF16.iconv('no bom') rescue LoadError JSON.support_unicode = false # enforce disabling of unicode support end # This class implements the JSON parser that is used to parse a JSON string # into a Ruby data structure. class Parser < StringScanner STRING = /"((?:[^"\\]|\\.)*)"/ INTEGER = /-?\d+/ FLOAT = /-?\d+\.(\d*)(?i:e[+-]?\d+)?/ OBJECT_OPEN = /\{/ OBJECT_CLOSE = /\}/ ARRAY_OPEN = /\[/ ARRAY_CLOSE = /\]/ PAIR_DELIMITER = /:/ COLLECTION_DELIMITER = /,/ TRUE = /true/ FALSE = /false/ NULL = /null/ IGNORE = %r( (?: //[^\n\r]*[\n\r]| # line comments /\* # c-style comments (?: [^*/]| # normal chars /[^*]| # slashes that do not start a nested comment \*[^/]| # asterisks that do not end this comment /(?=\*/) # single slash before this comment's end )* \*/ # the end of this comment |\s+ # whitespaces )+ )mx UNPARSED = Object.new # Parses the current JSON string and returns the complete data structure # as a result. def parse reset until eos? case when scan(ARRAY_OPEN) return parse_array when scan(OBJECT_OPEN) return parse_object when skip(IGNORE) ; when !((value = parse_value).equal? UNPARSED) return value else raise ParserError, "source '#{peek(20)}' not in JSON!" end end end private def parse_string if scan(STRING) return '' if self[1].empty? self[1].gsub(/\\(?:[\\bfnrt"]|u([A-Fa-f\d]{4}))/) do case $~[0] when '\\\\' then '\\' when '\\b' then "\b" when '\\f' then "\f" when '\\n' then "\n" when '\\r' then "\r" when '\\t' then "\t" when '\\"' then '"' else if JSON.support_unicode? and $KCODE == 'UTF8' JSON.utf16_to_utf8($~[1]) else # if utf8 mode is switched off or unicode not supported, try to # transform unicode \u-notation to bytes directly: $~[1].to_i(16).chr end end end else UNPARSED end end def parse_value case when scan(FLOAT) Float(self[0]) when scan(INTEGER) Integer(self[0]) when scan(TRUE) true when scan(FALSE) false when scan(NULL) nil when (string = parse_string) != UNPARSED string when scan(ARRAY_OPEN) parse_array when scan(OBJECT_OPEN) parse_object else UNPARSED end end def parse_array result = [] until eos? case when (value = parse_value) != UNPARSED result << value skip(IGNORE) unless scan(COLLECTION_DELIMITER) or match?(ARRAY_CLOSE) raise ParserError, "expected ',' or ']' in array at '#{peek(20)}'!" end when scan(ARRAY_CLOSE) break when skip(IGNORE) ; else raise ParserError, "unexpected token in array at '#{peek(20)}'!" end end result end def parse_object result = {} until eos? case when (string = parse_string) != UNPARSED skip(IGNORE) unless scan(PAIR_DELIMITER) raise ParserError, "expected ':' in object at '#{peek(20)}'!" end skip(IGNORE) unless (value = parse_value).equal? UNPARSED result[string] = value skip(IGNORE) unless scan(COLLECTION_DELIMITER) or match?(OBJECT_CLOSE) raise ParserError, "expected ',' or '}' in object at '#{peek(20)}'!" end else raise ParserError, "expected value in object at '#{peek(20)}'!" end when scan(OBJECT_CLOSE) if klassname = result['json_class'] klass = klassname.sub(/^:+/, '').split(/::/).inject(Object) do |p,k| p.const_get(k) rescue nil end break unless klass and klass.json_creatable? result = klass.json_create(result) end break when skip(IGNORE) ; else raise ParserError, "unexpected token in object at '#{peek(20)}'!" end end result end end # This class is used to create State instances, that are use to hold data # while unparsing a Ruby data structure into a JSON string. class State # Creates a State object from _opts_, which ought to be Hash to create a # new State instance configured by opts, something else to create an # unconfigured instance. If _opts_ is a State object, it is just returned. def self.from_state(opts) case opts when self opts when Hash new(opts) else new end end # Instantiates a new State object, configured by _opts_. def initialize(opts = {}) @indent = opts[:indent] || '' @space = opts[:space] || '' @object_nl = opts[:object_nl] || '' @array_nl = opts[:array_nl] || '' @seen = {} end # This string is used to indent levels in the JSON string. attr_accessor :indent # This string is used to include a space between the tokens in a JSON # string. attr_accessor :space # This string is put at the end of a line that holds a JSON object (or # Hash). attr_accessor :object_nl # This string is put at the end of a line that holds a JSON array. attr_accessor :array_nl # Returns _true_, if _object_ was already seen during this Unparsing run. def seen?(object) @seen.key?(object.__id__) end # Remember _object_, to find out if it was already encountered (to find out # if a cyclic data structure is unparsed). def remember(object) @seen[object.__id__] = true end # Forget _object_ for this Unparsing run. def forget(object) @seen.delete object.__id__ end end module_function # Convert _string_ from UTF8 encoding to UTF16 (big endian) encoding and # return it. def utf8_to_utf16(string) JSON::UTF8toUTF16.iconv(string).unpack('H*')[0] end # Convert _string_ from UTF16 (big endian) encoding to UTF8 encoding and # return it. def utf16_to_utf8(string) bytes = '' << string[0, 2].to_i(16) << string[2, 2].to_i(16) JSON::UTF16toUTF8.iconv(bytes) end # Convert a UTF8 encoded Ruby string _string_ to a JSON string, encoded with # UTF16 big endian characters as \u????, and return it. def utf8_to_json(string) i, n, result = 0, string.size, '' while i < n char = string[i] case when char == ?\b then result << '\b' when char == ?\t then result << '\t' when char == ?\n then result << '\n' when char == ?\f then result << '\f' when char == ?\r then result << '\r' when char == ?" then result << '\"' when char == ?\\ then result << '\\\\' when char.between?(0x0, 0x1f) then result << "\\u%04x" % char when char.between?(0x20, 0x7f) then result << char when !(JSON.support_unicode? && $KCODE == 'UTF8') # if utf8 mode is switched off or unicode not supported, just pass # bytes through: result << char when char & 0xe0 == 0xc0 result << '\u' << utf8_to_utf16(string[i, 2]) i += 1 when char & 0xf0 == 0xe0 result << '\u' << utf8_to_utf16(string[i, 3]) i += 2 when char & 0xf8 == 0xf0 result << '\u' << utf8_to_utf16(string[i, 4]) i += 3 when char & 0xfc == 0xf8 result << '\u' << utf8_to_utf16(string[i, 5]) i += 4 when char & 0xfe == 0xfc result << '\u' << utf8_to_utf16(string[i, 6]) i += 5 else raise JSON::UnparserError, "Encountered unknown UTF-8 byte: %x!" % char end i += 1 end result end # Parse the JSON string _source_ into a Ruby data structure and return it. def parse(source) Parser.new(source).parse end # Unparse the Ruby data structure _obj_ into a single line JSON string and # return it. _state_ is a JSON::State object, that can be used to configure # the output further. def unparse(obj, state = nil) obj.to_json(JSON::State.from_state(state)) end # Unparse the Ruby data structure _obj_ into a JSON string and return it. # The returned string is a prettier form of the string returned by #unparse. def pretty_unparse(obj) state = JSON::State.new( :indent => ' ', :space => ' ', :object_nl => "\n", :array_nl => "\n" ) obj.to_json(state) end end