Skip to content

Commit

Permalink
Recalculate position so it is based on the index of unicode codepoints.
Browse files Browse the repository at this point in the history
`cucumber-ruby` expects position values which are based on the index of
the codepoint instead of the index of the code unit. This change modifies
the value returned to `cucumber-ruby`.

Prior to this change, the RegexSubMatch's position, which was correct in
terms of a code unit array, would cause an `index out of string` error and
crash cucumber-ruby when pretty-printing the results of a test.

This commit also ammends the added tests to demonstrate the corrected
behavior.
  • Loading branch information
src committed Jul 1, 2019
1 parent 581b3c8 commit 02d74ed
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 18 deletions.
8 changes: 1 addition & 7 deletions features/specific/wire_encoding.feature
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,7 @@ Feature: Wire Encoding Feature
"""
When Cucumber runs the feature
Then the step output should contain:
# EXPECTED
# """
# カラオケ機ASCII
#
# """
# ACTUAL
"""
\\u00E3\\u0082\\u00AB\\u00E3\\u0083\\u00A9\\u00E3\\u0082\\u00AA\\u00E3\\u0082\\u00B1\\u00E6\\u00A9\\u009FASCII
カラオケ機ASCII
"""
1 change: 1 addition & 0 deletions include/cucumber-cpp/internal/Table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class CUCUMBER_CPP_EXPORT Table {
* @brief addColumn
* @param column
*
* @throws std::range_error
* @throws std::runtime_error
*/
void addColumn(const std::string column);
Expand Down
31 changes: 24 additions & 7 deletions src/Regex.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#include <cucumber-cpp/internal/utils/Regex.hpp>
#include <boost/make_shared.hpp>

#include <algorithm>
#include <stdexcept>

namespace cucumber {
namespace internal {

Expand All @@ -24,18 +27,32 @@ boost::shared_ptr<RegexMatch> Regex::find(const std::string &expression) const {
return boost::make_shared<FindRegexMatch>(regexImpl, expression);
}

FindRegexMatch::FindRegexMatch(const boost::regex &regexImpl, const std::string &expression) {
namespace {
bool isUtf8CodeUnitStartOfCodepoint(unsigned int i) {
return (i & 0xc0) != 0x80;
}

std::ptrdiff_t utf8CodepointOffset(const std::string &expression,
boost::smatch::const_iterator &matchIterator) {
if (expression.size() < static_cast<size_t>(matchIterator->first - expression.begin())) {
throw std::range_error("first codepoint in match is out of range");
}
return count_if(expression.begin(), matchIterator->first, &isUtf8CodeUnitStartOfCodepoint);
}
} // namespace

FindRegexMatch::FindRegexMatch(const boost::regex& regexImpl, const std::string& expression) {
boost::smatch matchResults;
regexMatched = boost::regex_search(
expression, matchResults, regexImpl, boost::regex_constants::match_extra);
if (regexMatched) {
boost::smatch::const_iterator i = matchResults.begin();
if (i != matchResults.end())
boost::smatch::const_iterator matchIterator = matchResults.begin();
if (matchIterator != matchResults.end())
// Skip capture group 0 which is the whole match, not a user marked sub-expression
++i;
for (; i != matchResults.end(); ++i) {
if (i->matched) {
RegexSubmatch s = {*i, i->first - expression.begin()};
++matchIterator;
for (; matchIterator != matchResults.end(); ++matchIterator) {
if (matchIterator->matched) {
RegexSubmatch s = {*matchIterator, utf8CodepointOffset(expression, matchIterator)};
submatches.push_back(s);
} else {
submatches.push_back(RegexSubmatch());
Expand Down
5 changes: 1 addition & 4 deletions tests/unit/RegexTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,7 @@ TEST(RegexTest, findReportsCodepointPositions) {
EXPECT_TRUE(match->matches());
ASSERT_EQ(2, match->getSubmatches().size());
EXPECT_EQ(5, match->getSubmatches()[0].position);
// EXPECTED:
// EXPECT_EQ(18, match->getSubmatches()[1].position);
// ACTUAL
EXPECT_EQ(28, match->getSubmatches()[1].position);
EXPECT_EQ(18, match->getSubmatches()[1].position);
}

TEST(RegexTest, findAllExtractsTheFirstGroupOfEveryToken) {
Expand Down

0 comments on commit 02d74ed

Please sign in to comment.