00001
00002
00003
00004
00005
00006 #if !defined(JSON_IS_AMALGAMATION)
00007 # include <json/reader.h>
00008 # include <json/value.h>
00009 # include "json_tool.h"
00010 #endif // if !defined(JSON_IS_AMALGAMATION)
00011 #include <utility>
00012 #include <cstdio>
00013 #include <cassert>
00014 #include <cstring>
00015 #include <iostream>
00016 #include <stdexcept>
00017
00018 #if _MSC_VER >= 1400 // VC++ 8.0
00019 #pragma warning( disable : 4996 ) // disable warning about strdup being deprecated.
00020 #endif
00021
00022 namespace Json {
00023
00024
00025
00026
00027 Features::Features()
00028 : allowComments_( true )
00029 , strictRoot_( false )
00030 {
00031 }
00032
00033
00034 Features
00035 Features::all()
00036 {
00037 return Features();
00038 }
00039
00040
00041 Features
00042 Features::strictMode()
00043 {
00044 Features features;
00045 features.allowComments_ = false;
00046 features.strictRoot_ = true;
00047 return features;
00048 }
00049
00050
00051
00052
00053
00054 static inline bool
00055 in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4 )
00056 {
00057 return c == c1 || c == c2 || c == c3 || c == c4;
00058 }
00059
00060 static inline bool
00061 in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4, Reader::Char c5 )
00062 {
00063 return c == c1 || c == c2 || c == c3 || c == c4 || c == c5;
00064 }
00065
00066
00067 static bool
00068 containsNewLine( Reader::Location begin,
00069 Reader::Location end )
00070 {
00071 for ( ;begin < end; ++begin )
00072 if ( *begin == '\n' || *begin == '\r' )
00073 return true;
00074 return false;
00075 }
00076
00077
00078
00079
00080
00081 Reader::Reader()
00082 : features_( Features::all() )
00083 {
00084 }
00085
00086
00087 Reader::Reader( const Features &features )
00088 : features_( features )
00089 {
00090 }
00091
00092
00093 bool
00094 Reader::parse( const std::string &document,
00095 Value &root,
00096 bool collectComments )
00097 {
00098 document_ = document;
00099 const char *begin = document_.c_str();
00100 const char *end = begin + document_.length();
00101 return parse( begin, end, root, collectComments );
00102 }
00103
00104
00105 bool
00106 Reader::parse( std::istream& sin,
00107 Value &root,
00108 bool collectComments )
00109 {
00110
00111
00112
00113
00114
00115
00116
00117 std::string doc;
00118 std::getline(sin, doc, (char)EOF);
00119 return parse( doc, root, collectComments );
00120 }
00121
00122 bool
00123 Reader::parse( const char *beginDoc, const char *endDoc,
00124 Value &root,
00125 bool collectComments )
00126 {
00127 if ( !features_.allowComments_ )
00128 {
00129 collectComments = false;
00130 }
00131
00132 begin_ = beginDoc;
00133 end_ = endDoc;
00134 collectComments_ = collectComments;
00135 current_ = begin_;
00136 lastValueEnd_ = 0;
00137 lastValue_ = 0;
00138 commentsBefore_ = "";
00139 errors_.clear();
00140 while ( !nodes_.empty() )
00141 nodes_.pop();
00142 nodes_.push( &root );
00143
00144 bool successful = readValue();
00145 Token token;
00146 skipCommentTokens( token );
00147 if ( collectComments_ && !commentsBefore_.empty() )
00148 root.setComment( commentsBefore_, commentAfter );
00149 if ( features_.strictRoot_ )
00150 {
00151 if ( !root.isArray() && !root.isObject() )
00152 {
00153
00154 token.type_ = tokenError;
00155 token.start_ = beginDoc;
00156 token.end_ = endDoc;
00157 addError( "A valid JSON document must be either an array or an object value.",
00158 token );
00159 return false;
00160 }
00161 }
00162 return successful;
00163 }
00164
00165
00166 bool
00167 Reader::readValue()
00168 {
00169 Token token;
00170 skipCommentTokens( token );
00171 bool successful = true;
00172
00173 if ( collectComments_ && !commentsBefore_.empty() )
00174 {
00175 currentValue().setComment( commentsBefore_, commentBefore );
00176 commentsBefore_ = "";
00177 }
00178
00179
00180 switch ( token.type_ )
00181 {
00182 case tokenObjectBegin:
00183 successful = readObject( token );
00184 break;
00185 case tokenArrayBegin:
00186 successful = readArray( token );
00187 break;
00188 case tokenNumber:
00189 successful = decodeNumber( token );
00190 break;
00191 case tokenString:
00192 successful = decodeString( token );
00193 break;
00194 case tokenTrue:
00195 currentValue() = true;
00196 break;
00197 case tokenFalse:
00198 currentValue() = false;
00199 break;
00200 case tokenNull:
00201 currentValue() = Value();
00202 break;
00203 default:
00204 return addError( "Syntax error: value, object or array expected.", token );
00205 }
00206
00207 if ( collectComments_ )
00208 {
00209 lastValueEnd_ = current_;
00210 lastValue_ = ¤tValue();
00211 }
00212
00213 return successful;
00214 }
00215
00216
00217 void
00218 Reader::skipCommentTokens( Token &token )
00219 {
00220 if ( features_.allowComments_ )
00221 {
00222 do
00223 {
00224 readToken( token );
00225 }
00226 while ( token.type_ == tokenComment );
00227 }
00228 else
00229 {
00230 readToken( token );
00231 }
00232 }
00233
00234
00235 bool
00236 Reader::expectToken( TokenType type, Token &token, const char *message )
00237 {
00238 readToken( token );
00239 if ( token.type_ != type )
00240 return addError( message, token );
00241 return true;
00242 }
00243
00244
00245 bool
00246 Reader::readToken( Token &token )
00247 {
00248 skipSpaces();
00249 token.start_ = current_;
00250 Char c = getNextChar();
00251 bool ok = true;
00252 switch ( c )
00253 {
00254 case '{':
00255 token.type_ = tokenObjectBegin;
00256 break;
00257 case '}':
00258 token.type_ = tokenObjectEnd;
00259 break;
00260 case '[':
00261 token.type_ = tokenArrayBegin;
00262 break;
00263 case ']':
00264 token.type_ = tokenArrayEnd;
00265 break;
00266 case '"':
00267 token.type_ = tokenString;
00268 ok = readString();
00269 break;
00270 case '/':
00271 token.type_ = tokenComment;
00272 ok = readComment();
00273 break;
00274 case '0':
00275 case '1':
00276 case '2':
00277 case '3':
00278 case '4':
00279 case '5':
00280 case '6':
00281 case '7':
00282 case '8':
00283 case '9':
00284 case '-':
00285 token.type_ = tokenNumber;
00286 readNumber();
00287 break;
00288 case 't':
00289 token.type_ = tokenTrue;
00290 ok = match( "rue", 3 );
00291 break;
00292 case 'f':
00293 token.type_ = tokenFalse;
00294 ok = match( "alse", 4 );
00295 break;
00296 case 'n':
00297 token.type_ = tokenNull;
00298 ok = match( "ull", 3 );
00299 break;
00300 case ',':
00301 token.type_ = tokenArraySeparator;
00302 break;
00303 case ':':
00304 token.type_ = tokenMemberSeparator;
00305 break;
00306 case 0:
00307 token.type_ = tokenEndOfStream;
00308 break;
00309 default:
00310 ok = false;
00311 break;
00312 }
00313 if ( !ok )
00314 token.type_ = tokenError;
00315 token.end_ = current_;
00316 return true;
00317 }
00318
00319
00320 void
00321 Reader::skipSpaces()
00322 {
00323 while ( current_ != end_ )
00324 {
00325 Char c = *current_;
00326 if ( c == ' ' || c == '\t' || c == '\r' || c == '\n' )
00327 ++current_;
00328 else
00329 break;
00330 }
00331 }
00332
00333
00334 bool
00335 Reader::match( Location pattern,
00336 int patternLength )
00337 {
00338 if ( end_ - current_ < patternLength )
00339 return false;
00340 int index = patternLength;
00341 while ( index-- )
00342 if ( current_[index] != pattern[index] )
00343 return false;
00344 current_ += patternLength;
00345 return true;
00346 }
00347
00348
00349 bool
00350 Reader::readComment()
00351 {
00352 Location commentBegin = current_ - 1;
00353 Char c = getNextChar();
00354 bool successful = false;
00355 if ( c == '*' )
00356 successful = readCStyleComment();
00357 else if ( c == '/' )
00358 successful = readCppStyleComment();
00359 if ( !successful )
00360 return false;
00361
00362 if ( collectComments_ )
00363 {
00364 CommentPlacement placement = commentBefore;
00365 if ( lastValueEnd_ && !containsNewLine( lastValueEnd_, commentBegin ) )
00366 {
00367 if ( c != '*' || !containsNewLine( commentBegin, current_ ) )
00368 placement = commentAfterOnSameLine;
00369 }
00370
00371 addComment( commentBegin, current_, placement );
00372 }
00373 return true;
00374 }
00375
00376
00377 void
00378 Reader::addComment( Location begin,
00379 Location end,
00380 CommentPlacement placement )
00381 {
00382 assert( collectComments_ );
00383 if ( placement == commentAfterOnSameLine )
00384 {
00385 assert( lastValue_ != 0 );
00386 lastValue_->setComment( std::string( begin, end ), placement );
00387 }
00388 else
00389 {
00390 if ( !commentsBefore_.empty() )
00391 commentsBefore_ += "\n";
00392 commentsBefore_ += std::string( begin, end );
00393 }
00394 }
00395
00396
00397 bool
00398 Reader::readCStyleComment()
00399 {
00400 while ( current_ != end_ )
00401 {
00402 Char c = getNextChar();
00403 if ( c == '*' && *current_ == '/' )
00404 break;
00405 }
00406 return getNextChar() == '/';
00407 }
00408
00409
00410 bool
00411 Reader::readCppStyleComment()
00412 {
00413 while ( current_ != end_ )
00414 {
00415 Char c = getNextChar();
00416 if ( c == '\r' || c == '\n' )
00417 break;
00418 }
00419 return true;
00420 }
00421
00422
00423 void
00424 Reader::readNumber()
00425 {
00426 while ( current_ != end_ )
00427 {
00428 if ( !(*current_ >= '0' && *current_ <= '9') &&
00429 !in( *current_, '.', 'e', 'E', '+', '-' ) )
00430 break;
00431 ++current_;
00432 }
00433 }
00434
00435 bool
00436 Reader::readString()
00437 {
00438 Char c = 0;
00439 while ( current_ != end_ )
00440 {
00441 c = getNextChar();
00442 if ( c == '\\' )
00443 getNextChar();
00444 else if ( c == '"' )
00445 break;
00446 }
00447 return c == '"';
00448 }
00449
00450
00451 bool
00452 Reader::readObject( Token & )
00453 {
00454 Token tokenName;
00455 std::string name;
00456 currentValue() = Value( objectValue );
00457 while ( readToken( tokenName ) )
00458 {
00459 bool initialTokenOk = true;
00460 while ( tokenName.type_ == tokenComment && initialTokenOk )
00461 initialTokenOk = readToken( tokenName );
00462 if ( !initialTokenOk )
00463 break;
00464 if ( tokenName.type_ == tokenObjectEnd && name.empty() )
00465 return true;
00466 if ( tokenName.type_ != tokenString )
00467 break;
00468
00469 name = "";
00470 if ( !decodeString( tokenName, name ) )
00471 return recoverFromError( tokenObjectEnd );
00472
00473 Token colon;
00474 if ( !readToken( colon ) || colon.type_ != tokenMemberSeparator )
00475 {
00476 return addErrorAndRecover( "Missing ':' after object member name",
00477 colon,
00478 tokenObjectEnd );
00479 }
00480 Value &value = currentValue()[ name ];
00481 nodes_.push( &value );
00482 bool ok = readValue();
00483 nodes_.pop();
00484 if ( !ok )
00485 return recoverFromError( tokenObjectEnd );
00486
00487 Token comma;
00488 if ( !readToken( comma )
00489 || ( comma.type_ != tokenObjectEnd &&
00490 comma.type_ != tokenArraySeparator &&
00491 comma.type_ != tokenComment ) )
00492 {
00493 return addErrorAndRecover( "Missing ',' or '}' in object declaration",
00494 comma,
00495 tokenObjectEnd );
00496 }
00497 bool finalizeTokenOk = true;
00498 while ( comma.type_ == tokenComment &&
00499 finalizeTokenOk )
00500 finalizeTokenOk = readToken( comma );
00501 if ( comma.type_ == tokenObjectEnd )
00502 return true;
00503 }
00504 return addErrorAndRecover( "Missing '}' or object member name",
00505 tokenName,
00506 tokenObjectEnd );
00507 }
00508
00509
00510 bool
00511 Reader::readArray( Token & )
00512 {
00513 currentValue() = Value( arrayValue );
00514 skipSpaces();
00515 if ( *current_ == ']' )
00516 {
00517 Token endArray;
00518 readToken( endArray );
00519 return true;
00520 }
00521 int index = 0;
00522 for (;;)
00523 {
00524 Value &value = currentValue()[ index++ ];
00525 nodes_.push( &value );
00526 bool ok = readValue();
00527 nodes_.pop();
00528 if ( !ok )
00529 return recoverFromError( tokenArrayEnd );
00530
00531 Token token;
00532
00533 ok = readToken( token );
00534 while ( token.type_ == tokenComment && ok )
00535 {
00536 ok = readToken( token );
00537 }
00538 bool badTokenType = ( token.type_ != tokenArraySeparator &&
00539 token.type_ != tokenArrayEnd );
00540 if ( !ok || badTokenType )
00541 {
00542 return addErrorAndRecover( "Missing ',' or ']' in array declaration",
00543 token,
00544 tokenArrayEnd );
00545 }
00546 if ( token.type_ == tokenArrayEnd )
00547 break;
00548 }
00549 return true;
00550 }
00551
00552
00553 bool
00554 Reader::decodeNumber( Token &token )
00555 {
00556 bool isDouble = false;
00557 for ( Location inspect = token.start_; inspect != token.end_; ++inspect )
00558 {
00559 isDouble = isDouble
00560 || in( *inspect, '.', 'e', 'E', '+' )
00561 || ( *inspect == '-' && inspect != token.start_ );
00562 }
00563 if ( isDouble )
00564 return decodeDouble( token );
00565
00566
00567
00568 Location current = token.start_;
00569 bool isNegative = *current == '-';
00570 if ( isNegative )
00571 ++current;
00572 Value::LargestUInt maxIntegerValue = isNegative ? Value::LargestUInt(-Value::minLargestInt)
00573 : Value::maxLargestUInt;
00574 Value::LargestUInt threshold = maxIntegerValue / 10;
00575 Value::UInt lastDigitThreshold = Value::UInt( maxIntegerValue % 10 );
00576 assert( lastDigitThreshold >=0 && lastDigitThreshold <= 9 );
00577 Value::LargestUInt value = 0;
00578 while ( current < token.end_ )
00579 {
00580 Char c = *current++;
00581 if ( c < '0' || c > '9' )
00582 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
00583 Value::UInt digit(c - '0');
00584 if ( value >= threshold )
00585 {
00586
00587
00588
00589 if ( current != token.end_ || digit > lastDigitThreshold )
00590 {
00591 return decodeDouble( token );
00592 }
00593 }
00594 value = value * 10 + digit;
00595 }
00596 if ( isNegative )
00597 currentValue() = -Value::LargestInt( value );
00598 else if ( value <= Value::LargestUInt(Value::maxInt) )
00599 currentValue() = Value::LargestInt( value );
00600 else
00601 currentValue() = value;
00602 return true;
00603 }
00604
00605
00606 bool
00607 Reader::decodeDouble( Token &token )
00608 {
00609 double value = 0;
00610 const int bufferSize = 32;
00611 int count;
00612 int length = int(token.end_ - token.start_);
00613 if ( length <= bufferSize )
00614 {
00615 Char buffer[bufferSize+1];
00616 memcpy( buffer, token.start_, length );
00617 buffer[length] = 0;
00618 count = sscanf( buffer, "%lf", &value );
00619 }
00620 else
00621 {
00622 std::string buffer( token.start_, token.end_ );
00623 count = sscanf( buffer.c_str(), "%lf", &value );
00624 }
00625
00626 if ( count != 1 )
00627 return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
00628 currentValue() = value;
00629 return true;
00630 }
00631
00632
00633 bool
00634 Reader::decodeString( Token &token )
00635 {
00636 std::string decoded;
00637 if ( !decodeString( token, decoded ) )
00638 return false;
00639 currentValue() = decoded;
00640 return true;
00641 }
00642
00643
00644 bool
00645 Reader::decodeString( Token &token, std::string &decoded )
00646 {
00647 decoded.reserve( token.end_ - token.start_ - 2 );
00648 Location current = token.start_ + 1;
00649 Location end = token.end_ - 1;
00650 while ( current != end )
00651 {
00652 Char c = *current++;
00653 if ( c == '"' )
00654 break;
00655 else if ( c == '\\' )
00656 {
00657 if ( current == end )
00658 return addError( "Empty escape sequence in string", token, current );
00659 Char escape = *current++;
00660 switch ( escape )
00661 {
00662 case '"': decoded += '"'; break;
00663 case '/': decoded += '/'; break;
00664 case '\\': decoded += '\\'; break;
00665 case 'b': decoded += '\b'; break;
00666 case 'f': decoded += '\f'; break;
00667 case 'n': decoded += '\n'; break;
00668 case 'r': decoded += '\r'; break;
00669 case 't': decoded += '\t'; break;
00670 case 'u':
00671 {
00672 unsigned int unicode;
00673 if ( !decodeUnicodeCodePoint( token, current, end, unicode ) )
00674 return false;
00675 decoded += codePointToUTF8(unicode);
00676 }
00677 break;
00678 default:
00679 return addError( "Bad escape sequence in string", token, current );
00680 }
00681 }
00682 else
00683 {
00684 decoded += c;
00685 }
00686 }
00687 return true;
00688 }
00689
00690 bool
00691 Reader::decodeUnicodeCodePoint( Token &token,
00692 Location ¤t,
00693 Location end,
00694 unsigned int &unicode )
00695 {
00696
00697 if ( !decodeUnicodeEscapeSequence( token, current, end, unicode ) )
00698 return false;
00699 if (unicode >= 0xD800 && unicode <= 0xDBFF)
00700 {
00701
00702 if (end - current < 6)
00703 return addError( "additional six characters expected to parse unicode surrogate pair.", token, current );
00704 unsigned int surrogatePair;
00705 if (*(current++) == '\\' && *(current++)== 'u')
00706 {
00707 if (decodeUnicodeEscapeSequence( token, current, end, surrogatePair ))
00708 {
00709 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
00710 }
00711 else
00712 return false;
00713 }
00714 else
00715 return addError( "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current );
00716 }
00717 return true;
00718 }
00719
00720 bool
00721 Reader::decodeUnicodeEscapeSequence( Token &token,
00722 Location ¤t,
00723 Location end,
00724 unsigned int &unicode )
00725 {
00726 if ( end - current < 4 )
00727 return addError( "Bad unicode escape sequence in string: four digits expected.", token, current );
00728 unicode = 0;
00729 for ( int index =0; index < 4; ++index )
00730 {
00731 Char c = *current++;
00732 unicode *= 16;
00733 if ( c >= '0' && c <= '9' )
00734 unicode += c - '0';
00735 else if ( c >= 'a' && c <= 'f' )
00736 unicode += c - 'a' + 10;
00737 else if ( c >= 'A' && c <= 'F' )
00738 unicode += c - 'A' + 10;
00739 else
00740 return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current );
00741 }
00742 return true;
00743 }
00744
00745
00746 bool
00747 Reader::addError( const std::string &message,
00748 Token &token,
00749 Location extra )
00750 {
00751 ErrorInfo info;
00752 info.token_ = token;
00753 info.message_ = message;
00754 info.extra_ = extra;
00755 errors_.push_back( info );
00756 return false;
00757 }
00758
00759
00760 bool
00761 Reader::recoverFromError( TokenType skipUntilToken )
00762 {
00763 int errorCount = int(errors_.size());
00764 Token skip;
00765 for (;;)
00766 {
00767 if ( !readToken(skip) )
00768 errors_.resize( errorCount );
00769 if ( skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream )
00770 break;
00771 }
00772 errors_.resize( errorCount );
00773 return false;
00774 }
00775
00776
00777 bool
00778 Reader::addErrorAndRecover( const std::string &message,
00779 Token &token,
00780 TokenType skipUntilToken )
00781 {
00782 addError( message, token );
00783 return recoverFromError( skipUntilToken );
00784 }
00785
00786
00787 Value &
00788 Reader::currentValue()
00789 {
00790 return *(nodes_.top());
00791 }
00792
00793
00794 Reader::Char
00795 Reader::getNextChar()
00796 {
00797 if ( current_ == end_ )
00798 return 0;
00799 return *current_++;
00800 }
00801
00802
00803 void
00804 Reader::getLocationLineAndColumn( Location location,
00805 int &line,
00806 int &column ) const
00807 {
00808 Location current = begin_;
00809 Location lastLineStart = current;
00810 line = 0;
00811 while ( current < location && current != end_ )
00812 {
00813 Char c = *current++;
00814 if ( c == '\r' )
00815 {
00816 if ( *current == '\n' )
00817 ++current;
00818 lastLineStart = current;
00819 ++line;
00820 }
00821 else if ( c == '\n' )
00822 {
00823 lastLineStart = current;
00824 ++line;
00825 }
00826 }
00827
00828 column = int(location - lastLineStart) + 1;
00829 ++line;
00830 }
00831
00832
00833 std::string
00834 Reader::getLocationLineAndColumn( Location location ) const
00835 {
00836 int line, column;
00837 getLocationLineAndColumn( location, line, column );
00838 char buffer[18+16+16+1];
00839 sprintf( buffer, "Line %d, Column %d", line, column );
00840 return buffer;
00841 }
00842
00843
00844
00845 std::string
00846 Reader::getFormatedErrorMessages() const
00847 {
00848 return getFormattedErrorMessages();
00849 }
00850
00851
00852 std::string
00853 Reader::getFormattedErrorMessages() const
00854 {
00855 std::string formattedMessage;
00856 for ( Errors::const_iterator itError = errors_.begin();
00857 itError != errors_.end();
00858 ++itError )
00859 {
00860 const ErrorInfo &error = *itError;
00861 formattedMessage += "* " + getLocationLineAndColumn( error.token_.start_ ) + "\n";
00862 formattedMessage += " " + error.message_ + "\n";
00863 if ( error.extra_ )
00864 formattedMessage += "See " + getLocationLineAndColumn( error.extra_ ) + " for detail.\n";
00865 }
00866 return formattedMessage;
00867 }
00868
00869
00870 std::istream& operator>>( std::istream &sin, Value &root )
00871 {
00872 Json::Reader reader;
00873 bool ok = reader.parse(sin, root, true);
00874
00875 if (!ok) throw std::runtime_error(reader.getFormattedErrorMessages());
00876 return sin;
00877 }
00878
00879
00880 }