Skip to content

Commit

Permalink
parser: skip BOM at the beginning of input
Browse files Browse the repository at this point in the history
... so that data format is properly recognized even when the file starts
with BOM.

Closes: #182
  • Loading branch information
kdudka committed May 21, 2024
1 parent 43ae09a commit fd86997
Show file tree
Hide file tree
Showing 7 changed files with 10,877 additions and 7 deletions.
26 changes: 21 additions & 5 deletions src/lib/instream.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,21 +57,37 @@ void InStream::handleError(const std::string &msg, const unsigned long line)
InStreamLookAhead::InStreamLookAhead(
InStream &input,
const unsigned size,
const bool skipBOM,
bool skipWhiteSpaces)
{
std::istream &inStr = input.str();

// read `size` chars from input
while (buf_.size() < size) {
const int c = inStr.get();
if (skipWhiteSpaces && isspace(c) && !!inStr)
int c = inStr.get();
if (skipBOM
// try to read BOM ... [0xEF, 0xBB, 0xBF]
&& (0xEF == c)
&& (0xBB == (c = inStr.get()))
&& (0xBF == (c = inStr.get())))
// BOM successfully read -> read the next char
c = inStr.get();

// read chars from input
for (;;) {
if (skipWhiteSpaces && isspace(c))
// skip a white-space
continue;
goto next;

// only the leading white-spaces are skipped
skipWhiteSpaces = false;

// append one char to the buffer
buf_.push_back(c);
if (size <= buf_.size())
// the requested number of chars have been read
break;
next:
// read the next char
c = inStr.get();
}

// put the chars back to the input stream
Expand Down
3 changes: 2 additions & 1 deletion src/lib/instream.hh
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ class InStreamLookAhead {
InStreamLookAhead(
InStream &input,
unsigned size,
bool skipWhiteSpaces = false);
bool skipBOM,
bool skipWhiteSpaces);

char operator[](const unsigned idx) const {
return buf_.at(idx);
Expand Down
4 changes: 3 additions & 1 deletion src/lib/parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ static inline std::unique_ptr<T> make_unique(InStream &input) {
AbstractParserPtr createParser(InStream &input)
{
// skip all white-spaces and sniff the first two chars from the input
InStreamLookAhead head(input, 2U, /* skipWhiteSpaces */ true);
InStreamLookAhead head(input, 2U,
/* skipBOM */ true,
/* skipWhiteSpaces */ true);

switch (head[0]) {
case '{':
Expand Down
1 change: 1 addition & 0 deletions tests/csgrep/0125-sarif-parser-bom-args.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--mode=json
10,604 changes: 10,604 additions & 0 deletions tests/csgrep/0125-sarif-parser-bom-stdin.txt

Large diffs are not rendered by default.

245 changes: 245 additions & 0 deletions tests/csgrep/0125-sarif-parser-bom-stdout.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
{
"scan": {
"analyzer-version-snyk-code": "1.0.0"
},
"defects": [
{
"checker": "SNYK_CODE_WARNING",
"cwe": 290,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/krb5-src/krb5/src/lib/krb5/krb/t_princ.c",
"line": 381,
"column": 26,
"h_size": 22,
"event": "note[cpp/WeakGuard]",
"message": "An hardcoded domain name is compared in strcmp. This check could lead to a bypass since the domain name can be spoofed or controlled by an attacker.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 290,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/krb5-src/krb5/src/wconfig.c",
"line": 140,
"column": 21,
"h_size": 12,
"event": "note[cpp/WeakGuard]",
"message": "An hardcoded domain name is compared in strcmp. This check could lead to a bypass since the domain name can be spoofed or controlled by an attacker.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 290,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/krb5-src/krb5/src/wconfig.c",
"line": 145,
"column": 21,
"h_size": 13,
"event": "note[cpp/WeakGuard]",
"message": "An hardcoded domain name is compared in strcmp. This check could lead to a bypass since the domain name can be spoofed or controlled by an attacker.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 190,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/sasl2-sys/sasl2/utils/smtptest.c",
"line": 360,
"column": 3,
"h_size": 4,
"event": "note[cpp/IntegerOverflow/test]",
"message": "Unsanitized input from a file flows into an addition operator (+), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 190,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/krb5-src/krb5/src/tests/asn.1/trval.c",
"line": 161,
"column": 19,
"h_size": 7,
"event": "note[cpp/IntegerOverflow/test]",
"message": "Unsanitized input from a file flows into an addition operator (+), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 190,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/krb5-src/krb5/src/tests/asn.1/trval.c",
"line": 161,
"column": 30,
"h_size": 2,
"event": "note[cpp/IntegerOverflow/test]",
"message": "Unsanitized input from a file flows into an addition operator (+), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 190,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/rdkafka-sys/librdkafka/tests/0098-consumer-txn.cpp",
"line": 307,
"column": 35,
"h_size": 7,
"event": "note[cpp/IntegerOverflow/test]",
"message": "Unsanitized input from a file flows into an addition operator (+), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 190,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/rdkafka-sys/librdkafka/tests/0098-consumer-txn.cpp",
"line": 444,
"column": 7,
"h_size": 29,
"event": "note[cpp/IntegerOverflow/test]",
"message": "Unsanitized input from a file flows into an addition operator (+), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 190,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/lz4-sys/liblz4/tests/abiTest.c",
"line": 81,
"column": 27,
"h_size": 26,
"event": "note[cpp/IntegerOverflow/test]",
"message": "Unsanitized input from a file flows into a subtraction operator (-), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 190,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/krb5-src/krb5/src/tests/asn.1/trval.c",
"line": 126,
"column": 17,
"h_size": 2,
"event": "note[cpp/IntegerOverflow/test]",
"message": "Unsanitized input from a file flows into a subtraction operator (-), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 190,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/krb5-src/krb5/src/tests/asn.1/trval.c",
"line": 128,
"column": 17,
"h_size": 2,
"event": "note[cpp/IntegerOverflow/test]",
"message": "Unsanitized input from a file flows into a subtraction operator (-), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 190,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/krb5-src/krb5/src/tests/asn.1/trval.c",
"line": 130,
"column": 17,
"h_size": 2,
"event": "note[cpp/IntegerOverflow/test]",
"message": "Unsanitized input from a file flows into a subtraction operator (-), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 190,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/openssl-src/openssl/test/confdump.c",
"line": 37,
"column": 25,
"h_size": 10,
"event": "note[cpp/IntegerOverflow/test]",
"message": "Unsanitized input from a command line argument flows into a subtraction operator (-), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
"verbosity_level": 0
}
]
},
{
"checker": "SNYK_CODE_WARNING",
"cwe": 190,
"tool": "snyk-code",
"key_event_idx": 0,
"events": [
{
"file_name": "vendor/lz4-sys/liblz4/tests/fuzzer.c",
"line": 378,
"column": 95,
"h_size": 9,
"event": "note[cpp/IntegerOverflow/test]",
"message": "Unsanitized input from a command line argument flows into a subtraction operator (-), where it is used in integer arithmetic. This may result in an integer overflow vulnerability.",
"verbosity_level": 0
}
]
}
]
}
1 change: 1 addition & 0 deletions tests/csgrep/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -168,3 +168,4 @@ test_csgrep("0121-cov-parser-lock-evasion" )
test_csgrep("0122-json-parser-cov-v10-column" )
test_csgrep("0123-csgrep-hash-v1" )
test_csgrep("0124-sarif-writer-imp" )
test_csgrep("0125-sarif-parser-bom" )

0 comments on commit fd86997

Please sign in to comment.