From a45f8f03c954956506c1b2896e559b66d3518d5b Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 30 Oct 2022 00:43:54 +0900 Subject: [PATCH 1/5] Match abbreviated day and month names with head of full names --- ext/date/date_strptime.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/ext/date/date_strptime.c b/ext/date/date_strptime.c index 7b06a31..fa1ed4c 100644 --- a/ext/date/date_strptime.c +++ b/ext/date/date_strptime.c @@ -10,16 +10,12 @@ static const char *day_names[] = { "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", - "Sun", "Mon", "Tue", "Wed", - "Thu", "Fri", "Sat" }; static const char *month_names[] = { "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December", - "Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; static const char *merid_names[] = { @@ -209,10 +205,12 @@ date__strptime_internal(const char *str, size_t slen, int i; for (i = 0; i < (int)sizeof_array(day_names); i++) { - size_t l = strlen(day_names[i]); - if (strncasecmp(day_names[i], &str[si], l) == 0) { + const char *day_name = day_names[i]; + size_t l = strlen(day_name); + if ((slen - si >= l && strncasecmp(day_name, &str[si], l) == 0) || + (slen - si >= (l = 3) && strncasecmp(day_name, &str[si], l) == 0)) { si += l; - set_hash("wday", INT2FIX(i % 7)); + set_hash("wday", INT2FIX(i)); goto matched; } } @@ -225,10 +223,12 @@ date__strptime_internal(const char *str, size_t slen, int i; for (i = 0; i < (int)sizeof_array(month_names); i++) { - size_t l = strlen(month_names[i]); - if (strncasecmp(month_names[i], &str[si], l) == 0) { + const char *month_name = month_names[i]; + size_t l = strlen(month_name); + if ((slen - si >= l && strncasecmp(month_name, &str[si], l) == 0) || + (slen - si >= (l = 3) && strncasecmp(month_name, &str[si], l) == 0)) { si += l; - set_hash("mon", INT2FIX((i % 12) + 1)); + set_hash("mon", INT2FIX(i + 1)); goto matched; } } From 7fe2bd5f94db38220374dd817e4c0151fad30096 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 30 Oct 2022 01:09:38 +0900 Subject: [PATCH 2/5] Remove `merid_names` table --- ext/date/date_strptime.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/ext/date/date_strptime.c b/ext/date/date_strptime.c index fa1ed4c..53b48d5 100644 --- a/ext/date/date_strptime.c +++ b/ext/date/date_strptime.c @@ -18,11 +18,6 @@ static const char *month_names[] = { "October", "November", "December", }; -static const char *merid_names[] = { - "am", "pm", - "a.m.", "p.m." -}; - static const char *extz_pats[] = { ":z", "::z", @@ -402,18 +397,19 @@ date__strptime_internal(const char *str, size_t slen, case 'P': case 'p': + if (slen - si < 2) fail(); { - int i; - - for (i = 0; i < 4; i++) { - size_t l = strlen(merid_names[i]); - if (strncasecmp(merid_names[i], &str[si], l) == 0) { - si += l; - set_hash("_merid", INT2FIX((i % 2) == 0 ? 0 : 12)); - goto matched; - } + char c = str[si]; + const int hour = (c == 'P' || c == 'p') ? 12 : 0; + if (!hour && !(c == 'A' || c == 'a')) fail(); + if ((c = str[si+1]) == '.') { + if (slen - si < 4 || str[si+3] != '.') fail(); + c = str[si += 2]; } - fail(); + if (!(c == 'M' || c == 'm')) fail(); + si += 2; + set_hash("_merid", INT2FIX(hour)); + goto matched; } case 'Q': From 945e26e24393ebe11e89c679699983f26c5ffc8e Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 30 Oct 2022 01:10:16 +0900 Subject: [PATCH 3/5] Remove `extz_pats` table --- ext/date/date_strptime.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/ext/date/date_strptime.c b/ext/date/date_strptime.c index 53b48d5..7d61347 100644 --- a/ext/date/date_strptime.c +++ b/ext/date/date_strptime.c @@ -18,12 +18,6 @@ static const char *month_names[] = { "October", "November", "December", }; -static const char *extz_pats[] = { - ":z", - "::z", - ":::z" -}; - #define sizeof_array(o) (sizeof o / sizeof o[0]) #define f_negate(x) rb_funcall(x, rb_intern("-@"), 0) @@ -185,12 +179,11 @@ date__strptime_internal(const char *str, size_t slen, { int i; - for (i = 0; i < (int)sizeof_array(extz_pats); i++) - if (strncmp(extz_pats[i], &fmt[fi], - strlen(extz_pats[i])) == 0) { - fi += i; - goto again; - } + for (i = 1; i < 3 && fi + i < flen && fmt[fi+i] == ':'; ++i); + if (fmt[fi+i] == 'z') { + fi += i - 1; + goto again; + } fail(); } From 3bfed83ce7efeb78042caaff838c0124b3605b63 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 30 Oct 2022 01:11:32 +0900 Subject: [PATCH 4/5] Consider the length of string to parse --- ext/date/date_strptime.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/ext/date/date_strptime.c b/ext/date/date_strptime.c index 7d61347..72d6b8a 100644 --- a/ext/date/date_strptime.c +++ b/ext/date/date_strptime.c @@ -60,7 +60,7 @@ num_pattern_p(const char *s) #define NUM_PATTERN_P() num_pattern_p(&fmt[fi + 1]) static long -read_digits(const char *s, VALUE *n, size_t width) +read_digits(const char *s, size_t slen, VALUE *n, size_t width) { size_t l; @@ -68,7 +68,7 @@ read_digits(const char *s, VALUE *n, size_t width) return 0; l = 0; - while (ISDIGIT(s[l])) { + while (l < slen && ISDIGIT(s[l])) { if (++l == width) break; } @@ -116,7 +116,7 @@ do { \ #define READ_DIGITS(n,w) \ do { \ size_t l; \ - l = read_digits(&str[si], &n, w); \ + l = read_digits(&str[si], slen - si, &n, w); \ if (l == 0) \ fail(); \ si += l; \ @@ -156,6 +156,14 @@ date__strptime_internal(const char *str, size_t slen, si = fi = 0; while (fi < flen) { + if (isspace((unsigned char)fmt[fi])) { + while (si < slen && isspace((unsigned char)str[si])) + si++; + while (++fi < flen && isspace((unsigned char)fmt[fi])); + continue; + } + + if (si >= slen) fail(); switch (fmt[fi]) { case '%': @@ -576,7 +584,7 @@ date__strptime_internal(const char *str, size_t slen, b = rb_backref_get(); rb_match_busy(b); - m = f_match(pat, rb_usascii_str_new2(&str[si])); + m = f_match(pat, rb_usascii_str_new(&str[si], slen - si)); if (!NIL_P(m)) { VALUE s, l, o; @@ -608,22 +616,13 @@ date__strptime_internal(const char *str, size_t slen, if (str[si] != '%') fail(); si++; - if (fi < flen) - if (str[si] != fmt[fi]) + if (fi < flen) { + if (si >= slen || str[si] != fmt[fi]) fail(); - si++; + si++; + } goto matched; } - case ' ': - case '\t': - case '\n': - case '\v': - case '\f': - case '\r': - while (isspace((unsigned char)str[si])) - si++; - fi++; - break; default: ordinal: if (str[si] != fmt[fi]) From 3f666fa882291e6cc39b4e4d1addaad6a64a08d7 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 18 Dec 2022 11:38:07 +0900 Subject: [PATCH 5/5] Extract `head_match_p` and abbreviated name length constants --- ext/date/date_strptime.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/ext/date/date_strptime.c b/ext/date/date_strptime.c index 72d6b8a..f731629 100644 --- a/ext/date/date_strptime.c +++ b/ext/date/date_strptime.c @@ -11,12 +11,14 @@ static const char *day_names[] = { "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", }; +static const int ABBREVIATED_DAY_NAME_LENGTH = 3; static const char *month_names[] = { "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December", }; +static const int ABBREVIATED_MONTH_NAME_LENGTH = 3; #define sizeof_array(o) (sizeof o / sizeof o[0]) @@ -146,6 +148,12 @@ do { \ VALUE date_zone_to_diff(VALUE); +static inline int +head_match_p(size_t len, const char *name, const char *str, size_t slen, size_t si) +{ + return slen - si >= len && strncasecmp(name, &str[si], len) == 0; +} + static size_t date__strptime_internal(const char *str, size_t slen, const char *fmt, size_t flen, VALUE hash) @@ -153,6 +161,7 @@ date__strptime_internal(const char *str, size_t slen, size_t si, fi; int c; +#define HEAD_MATCH_P(len, name) head_match_p(len, name, str, slen, si) si = fi = 0; while (fi < flen) { @@ -203,8 +212,8 @@ date__strptime_internal(const char *str, size_t slen, for (i = 0; i < (int)sizeof_array(day_names); i++) { const char *day_name = day_names[i]; size_t l = strlen(day_name); - if ((slen - si >= l && strncasecmp(day_name, &str[si], l) == 0) || - (slen - si >= (l = 3) && strncasecmp(day_name, &str[si], l) == 0)) { + if (HEAD_MATCH_P(l, day_name) || + HEAD_MATCH_P(l = ABBREVIATED_DAY_NAME_LENGTH, day_name)) { si += l; set_hash("wday", INT2FIX(i)); goto matched; @@ -221,8 +230,8 @@ date__strptime_internal(const char *str, size_t slen, for (i = 0; i < (int)sizeof_array(month_names); i++) { const char *month_name = month_names[i]; size_t l = strlen(month_name); - if ((slen - si >= l && strncasecmp(month_name, &str[si], l) == 0) || - (slen - si >= (l = 3) && strncasecmp(month_name, &str[si], l) == 0)) { + if (HEAD_MATCH_P(l, month_name) || + HEAD_MATCH_P(l = ABBREVIATED_MONTH_NAME_LENGTH, month_name)) { si += l; set_hash("mon", INT2FIX(i + 1)); goto matched;