From b5756546a116ebe3ba2be7f83d6c85803903d998 Mon Sep 17 00:00:00 2001 From: Javier Campanini Date: Mon, 19 Jul 2021 14:45:58 -0400 Subject: [PATCH] Decode quoted-printable UTF8 in email subjects (#25) --- gitdiff/patch_header.go | 28 +++++++++++++++++++++++- gitdiff/patch_header_test.go | 41 ++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/gitdiff/patch_header.go b/gitdiff/patch_header.go index 16e3d3e..c3c387d 100644 --- a/gitdiff/patch_header.go +++ b/gitdiff/patch_header.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "io" + "mime/quotedprintable" "net/mail" "strconv" "strings" @@ -457,5 +458,30 @@ func parseSubject(s string) (string, string) { break } - return s[:at], s[at:] + return s[:at], decodeSubject(s[at:]) +} + +// Decodes a subject line. Currently only supports quoted-printable UTF-8. This format is the result +// of a `git format-patch` when the commit title has a non-ASCII character (i.e. an emoji). +// See for reference: https://stackoverflow.com/questions/27695749/gmail-api-not-respecting-utf-encoding-in-subject +func decodeSubject(encoded string) string { + if !strings.HasPrefix(encoded, "=?UTF-8?q?") { + // not UTF-8 encoded + return encoded + } + + // If the subject is too long, `git format-patch` may produce a subject line across + // multiple lines. When parsed, this can look like the following: + // + payload := " " + encoded + payload = strings.ReplaceAll(payload, " =?UTF-8?q?", "") + payload = strings.ReplaceAll(payload, "?=", "") + + decoded, err := io.ReadAll(quotedprintable.NewReader(strings.NewReader(payload))) + if err != nil { + // if err, abort decoding and return original subject + return encoded + } + + return string(decoded) } diff --git a/gitdiff/patch_header_test.go b/gitdiff/patch_header_test.go index 7dc7f13..bda91fe 100644 --- a/gitdiff/patch_header_test.go +++ b/gitdiff/patch_header_test.go @@ -138,6 +138,8 @@ func TestParsePatchHeader(t *testing.T) { } expectedDate := time.Date(2020, 04, 11, 15, 21, 23, 0, time.FixedZone("PDT", -7*60*60)) expectedTitle := "A sample commit to test header parsing" + expectedEmojiOneLineTitle := "🤖 Enabling auto-merging" + expectedEmojiMultiLineTitle := "[IA64] Put ia64 config files on the Uwe Kleine-König diet" expectedBody := "The medium format shows the body, which\nmay wrap on to multiple lines.\n\nAnother body line." expectedBodyAppendix := "CC: Joe Smith " @@ -267,6 +269,45 @@ Another body line. Body: expectedBody, }, }, + "mailboxEmojiOneLine": { + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 +From: Morton Haypenny +Date: Sat, 11 Apr 2020 15:21:23 -0700 +Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20Enabling=20auto-merging?= + +The medium format shows the body, which +may wrap on to multiple lines. + +Another body line. +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: expectedIdentity, + AuthorDate: expectedDate, + Title: expectedEmojiOneLineTitle, + Body: expectedBody, + }, + }, + "mailboxEmojiMultiLine": { + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 +From: Morton Haypenny +Date: Sat, 11 Apr 2020 15:21:23 -0700 +Subject: [PATCH] =?UTF-8?q?[IA64]=20Put=20ia64=20config=20files=20on=20the=20?= + =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig=20diet?= + +The medium format shows the body, which +may wrap on to multiple lines. + +Another body line. +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: expectedIdentity, + AuthorDate: expectedDate, + Title: expectedEmojiMultiLineTitle, + Body: expectedBody, + }, + }, "mailboxAppendix": { Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 From: Morton Haypenny