From e39fb0083c2b013f684031f5cef8182634e121f5 Mon Sep 17 00:00:00 2001 From: Mark Zeren Date: Tue, 20 Jan 2015 02:02:33 +0000 Subject: [PATCH] Normalize comment EOLs while reading instead of while writing Tests are currently failing when git cloning on Windows with autocrlf = true. In that setup multiline comments contain \r\n EOLs. The test code assumes that comments contain \n EOLs and opens the .actual files (etc.) with "wt" which converts \n to \r\n. Thus we end up with \r\r\n EOLs in the output, which triggers a test failure. Instead we should cannonicalize comments while reading so that they contain only \n EOLs. This approach simplifies other parts of the reader and writer logic, and requires no changes to the test. It is a breaking change, but probably the Right Thing going forward. This change also fixes dereferencing past the end of the comment string in StyledWriter::writeCommentBeforeValue. Tests should be added with appropriate .gitattributes for the input files to ensure that we run tests for DOS, Mac, and Unix EOL files on all platforms. For now this change is enough to unblock Windows builds. issue #116 --- src/lib_json/json_reader.cpp | 44 ++++++++++++++++++++------- src/lib_json/json_writer.cpp | 59 +++++++----------------------------- 2 files changed, 44 insertions(+), 59 deletions(-) diff --git a/src/lib_json/json_reader.cpp b/src/lib_json/json_reader.cpp index b61f1b1..9e6d161 100644 --- a/src/lib_json/json_reader.cpp +++ b/src/lib_json/json_reader.cpp @@ -135,14 +135,9 @@ bool Reader::readValue() { bool successful = true; if (collectComments_ && !commentsBefore_.empty()) { - // Remove newline characters at the end of the comments - size_t lastNonNewline = commentsBefore_.find_last_not_of("\r\n"); - if (lastNonNewline != std::string::npos) { - commentsBefore_.erase(lastNonNewline + 1); - } else { - commentsBefore_.clear(); - } - + // Remove newline at the end of the comment + if (commentsBefore_[commentsBefore_.size() - 1] == '\n') + commentsBefore_.resize(commentsBefore_.size() - 1); currentValue().setComment(commentsBefore_, commentBefore); commentsBefore_ = ""; } @@ -337,14 +332,34 @@ bool Reader::readComment() { return true; } +static std::string normalizeEOL(Reader::Location begin, Reader::Location end) { + std::string normalized; + normalized.reserve(end - begin); + Reader::Location current = begin; + while (current != end) { + char c = *current++; + if (c == '\r') { + if (current != end && *current == '\n') + // convert dos EOL + ++current; + // convert Mac EOL + normalized += '\n'; + } else { + normalized += c; + } + } + return normalized; +} + void Reader::addComment(Location begin, Location end, CommentPlacement placement) { assert(collectComments_); + const std::string& normalized = normalizeEOL(begin, end); if (placement == commentAfterOnSameLine) { assert(lastValue_ != 0); - lastValue_->setComment(std::string(begin, end), placement); + lastValue_->setComment(normalized, placement); } else { - commentsBefore_ += std::string(begin, end); + commentsBefore_ += normalized; } } @@ -360,8 +375,15 @@ bool Reader::readCStyleComment() { bool Reader::readCppStyleComment() { while (current_ != end_) { Char c = getNextChar(); - if (c == '\r' || c == '\n') + if (c == '\n') break; + if (c == '\r') { + // Consume DOS EOL. It will be normalized in addComment. + if (current_ != end_ && *current_ == '\n') + getNextChar(); + // Break on Moc OS 9 EOL. + break; + } } return true; } diff --git a/src/lib_json/json_writer.cpp b/src/lib_json/json_writer.cpp index 5113c38..778661c 100644 --- a/src/lib_json/json_writer.cpp +++ b/src/lib_json/json_writer.cpp @@ -421,26 +421,27 @@ void StyledWriter::writeCommentBeforeValue(const Value& root) { document_ += "\n"; writeIndent(); - std::string normalizedComment = normalizeEOL(root.getComment(commentBefore)); - std::string::const_iterator iter = normalizedComment.begin(); - while (iter != normalizedComment.end()) { + const std::string& comment = root.getComment(commentBefore); + std::string::const_iterator iter = comment.begin(); + while (iter != comment.end()) { document_ += *iter; - if (*iter == '\n' && *(iter + 1) == '/') + if (*iter == '\n' && + (iter != comment.end() && *(iter + 1) == '/')) writeIndent(); ++iter; } - // Comments are stripped of newlines, so add one here + // Comments are stripped of trailing newlines, so add one here document_ += "\n"; } void StyledWriter::writeCommentAfterValueOnSameLine(const Value& root) { if (root.hasComment(commentAfterOnSameLine)) - document_ += " " + normalizeEOL(root.getComment(commentAfterOnSameLine)); + document_ += " " + root.getComment(commentAfterOnSameLine); if (root.hasComment(commentAfter)) { document_ += "\n"; - document_ += normalizeEOL(root.getComment(commentAfter)); + document_ += root.getComment(commentAfter); document_ += "\n"; } } @@ -451,25 +452,6 @@ bool StyledWriter::hasCommentForValue(const Value& value) { value.hasComment(commentAfter); } -std::string StyledWriter::normalizeEOL(const std::string& text) { - std::string normalized; - normalized.reserve(text.length()); - const char* begin = text.c_str(); - const char* end = begin + text.length(); - const char* current = begin; - while (current != end) { - char c = *current++; - if (c == '\r') // mac or dos EOL - { - if (*current == '\n') // convert dos EOL - ++current; - normalized += '\n'; - } else // handle unix EOL & other char - normalized += c; - } - return normalized; -} - // Class StyledStreamWriter // ////////////////////////////////////////////////////////////////// @@ -646,17 +628,17 @@ void StyledStreamWriter::unindent() { void StyledStreamWriter::writeCommentBeforeValue(const Value& root) { if (!root.hasComment(commentBefore)) return; - *document_ << normalizeEOL(root.getComment(commentBefore)); + *document_ << root.getComment(commentBefore); *document_ << "\n"; } void StyledStreamWriter::writeCommentAfterValueOnSameLine(const Value& root) { if (root.hasComment(commentAfterOnSameLine)) - *document_ << " " + normalizeEOL(root.getComment(commentAfterOnSameLine)); + *document_ << " " + root.getComment(commentAfterOnSameLine); if (root.hasComment(commentAfter)) { *document_ << "\n"; - *document_ << normalizeEOL(root.getComment(commentAfter)); + *document_ << root.getComment(commentAfter); *document_ << "\n"; } } @@ -667,25 +649,6 @@ bool StyledStreamWriter::hasCommentForValue(const Value& value) { value.hasComment(commentAfter); } -std::string StyledStreamWriter::normalizeEOL(const std::string& text) { - std::string normalized; - normalized.reserve(text.length()); - const char* begin = text.c_str(); - const char* end = begin + text.length(); - const char* current = begin; - while (current != end) { - char c = *current++; - if (c == '\r') // mac or dos EOL - { - if (*current == '\n') // convert dos EOL - ++current; - normalized += '\n'; - } else // handle unix EOL & other char - normalized += c; - } - return normalized; -} - std::ostream& operator<<(std::ostream& sout, const Value& root) { Json::StyledStreamWriter writer; writer.write(sout, root);