From 0804dc4de21a5891097239730245abbed282ad2d Mon Sep 17 00:00:00 2001 From: Eduard Sukharev Date: Sun, 19 Mar 2023 12:48:15 +0300 Subject: [PATCH 1/6] Fix selecting file for print with M23 by LFN When Marlin scans VFAT sequences to compose a LFN to be compared with provided one, the actual LFN is UTF-16LE, while provided filename is supposedly a UTF-8. This commit extracts existing conversion logic and reuses it when searching for file by it's LFN. --- Marlin/src/sd/SdBaseFile.cpp | 70 ++++++++++++++++++++---------------- Marlin/src/sd/SdBaseFile.h | 6 +++- 2 files changed, 45 insertions(+), 31 deletions(-) diff --git a/Marlin/src/sd/SdBaseFile.cpp b/Marlin/src/sd/SdBaseFile.cpp index b8b49b7ba0f8..a420b95ca418 100644 --- a/Marlin/src/sd/SdBaseFile.cpp +++ b/Marlin/src/sd/SdBaseFile.cpp @@ -711,6 +711,7 @@ bool SdBaseFile::open(SdBaseFile *dirFile, const uint8_t dname[11] if (lfnChecksum == pvFat->checksum) { // Set chunk of LFN from VFAT entry into lfnName getLFNName(pvFat, (char *)lfnName, lfnSequenceNumber); + convertUtf16ToUtf8((char *)lfnName); // LFN found? if (!strncasecmp((char*)dlname, (char*)lfnName, lfnNameLength)) lfnFileFound = true; } @@ -1506,44 +1507,53 @@ int8_t SdBaseFile::readDir(dir_t *dir, char *longFilename) { // Post-process normal file or subdirectory longname, if any if (DIR_IS_FILE_OR_SUBDIR(dir)) { #if ENABLED(UTF_FILENAME_SUPPORT) - #if LONG_FILENAME_CHARSIZE > 2 - // Add warning for developers for unsupported 3-byte cases. - // (Converting 2-byte codepoints to 3-byte in-place would break the rest of filename.) - #error "Currently filename re-encoding is done in-place. It may break the remaining chars to use 3-byte codepoints." - #endif - // Is there a long filename to decode? if (longFilename) { - // Reset n to the start of the long name - n = 0; - for (uint16_t idx = 0; idx < (LONG_FILENAME_LENGTH); idx += 2) { // idx is fixed since FAT LFN always contains UTF-16LE encoding - const uint16_t utf16_ch = longFilename[idx] | (longFilename[idx + 1] << 8); - if (0xD800 == (utf16_ch & 0xF800)) // Surrogate pair - encode as '_' - longFilename[n++] = '_'; - else if (0 == (utf16_ch & 0xFF80)) // Encode as 1-byte UTF-8 char - longFilename[n++] = utf16_ch & 0x007F; - else if (0 == (utf16_ch & 0xF800)) { // Encode as 2-byte UTF-8 char - longFilename[n++] = 0xC0 | ((utf16_ch >> 6) & 0x1F); - longFilename[n++] = 0x80 | ( utf16_ch & 0x3F); - } - else { - #if LONG_FILENAME_CHARSIZE > 2 // Encode as 3-byte UTF-8 char - longFilename[n++] = 0xE0 | ((utf16_ch >> 12) & 0x0F); - longFilename[n++] = 0xC0 | ((utf16_ch >> 6) & 0x3F); - longFilename[n++] = 0xC0 | ( utf16_ch & 0x3F); - #else // Encode as '_' - longFilename[n++] = '_'; - #endif - } - if (0 == utf16_ch) break; // End of filename - } // idx - } // longFilename + n = convertUtf16ToUtf8(longFilename); + } #endif return n; } // DIR_IS_FILE_OR_SUBDIR } } +#if ENABLED(UTF_FILENAME_SUPPORT) + uint8_t SdBaseFile::convertUtf16ToUtf8(char *longFilename) { + #if LONG_FILENAME_CHARSIZE > 2 + // Add warning for developers for unsupported 3-byte cases. + // (Converting 2-byte codepoints to 3-byte in-place would break the rest of filename.) + #error "Currently filename re-encoding is done in-place. It may break the remaining chars to use 3-byte codepoints." + #endif + + int16_t n; + // Reset n to the start of the long name + n = 0; + for (uint16_t idx = 0; idx < (LONG_FILENAME_LENGTH); idx += 2) { // idx is fixed since FAT LFN always contains UTF-16LE encoding + const uint16_t utf16_ch = longFilename[idx] | (longFilename[idx + 1] << 8); + if (0xD800 == (utf16_ch & 0xF800)) // Surrogate pair - encode as '_' + longFilename[n++] = '_'; + else if (0 == (utf16_ch & 0xFF80)) // Encode as 1-byte UTF-8 char + longFilename[n++] = utf16_ch & 0x007F; + else if (0 == (utf16_ch & 0xF800)) { // Encode as 2-byte UTF-8 char + longFilename[n++] = 0xC0 | ((utf16_ch >> 6) & 0x1F); + longFilename[n++] = 0x80 | ( utf16_ch & 0x3F); + } + else { + #if LONG_FILENAME_CHARSIZE > 2 // Encode as 3-byte UTF-8 char + longFilename[n++] = 0xE0 | ((utf16_ch >> 12) & 0x0F); + longFilename[n++] = 0xC0 | ((utf16_ch >> 6) & 0x3F); + longFilename[n++] = 0xC0 | ( utf16_ch & 0x3F); + #else // Encode as '_' + longFilename[n++] = '_'; + #endif + } + if (0 == utf16_ch) break; // End of filename + } // idx + + return n; + } +#endif // UTF_FILENAME_SUPPORT + // Read next directory entry into the cache // Assumes file is correctly positioned dir_t* SdBaseFile::readDirCache() { diff --git a/Marlin/src/sd/SdBaseFile.h b/Marlin/src/sd/SdBaseFile.h index dd8e2aff4bd4..9baa71e9336e 100644 --- a/Marlin/src/sd/SdBaseFile.h +++ b/Marlin/src/sd/SdBaseFile.h @@ -392,6 +392,10 @@ class SdBaseFile { bool openCachedEntry(uint8_t cacheIndex, uint8_t oflags); dir_t* readDirCache(); + #if ENABLED(UTF_FILENAME_SUPPORT) + uint8_t convertUtf16ToUtf8(char *longFilename); + #endif // UTF_FILENAME_SUPPORT + // Long Filename create/write support #if ENABLED(LONG_FILENAME_WRITE_SUPPORT) static bool isDirLFN(const dir_t* dir); @@ -403,5 +407,5 @@ class SdBaseFile { static inline uint8_t getLFNEntriesNum(const char *lname) { return (strlen(lname) + 12) / 13; } static void getLFNName(vfat_t *vFatDir, char *lname, uint8_t startOffset); static void setLFNName(vfat_t *vFatDir, char *lname, uint8_t lfnSequenceNumber); - #endif + #endif // LONG_FILENAME_WRITE_SUPPORT }; From 1f9c4b9d2b71d2047235abac440acacd243f142b Mon Sep 17 00:00:00 2001 From: Eduard Sukharev Date: Sun, 19 Mar 2023 17:56:33 +0300 Subject: [PATCH 2/6] Fix utf8 conversion call only when UTF_FILENAME_SUPPORT enabled --- Marlin/src/sd/SdBaseFile.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Marlin/src/sd/SdBaseFile.cpp b/Marlin/src/sd/SdBaseFile.cpp index a420b95ca418..ea18f0bc8353 100644 --- a/Marlin/src/sd/SdBaseFile.cpp +++ b/Marlin/src/sd/SdBaseFile.cpp @@ -711,7 +711,9 @@ bool SdBaseFile::open(SdBaseFile *dirFile, const uint8_t dname[11] if (lfnChecksum == pvFat->checksum) { // Set chunk of LFN from VFAT entry into lfnName getLFNName(pvFat, (char *)lfnName, lfnSequenceNumber); - convertUtf16ToUtf8((char *)lfnName); + #if ENABLED(UTF_FILENAME_SUPPORT) + convertUtf16ToUtf8((char *)lfnName); + #endif // LFN found? if (!strncasecmp((char*)dlname, (char*)lfnName, lfnNameLength)) lfnFileFound = true; } From 2ec1ccc25c7ad239068a6ec9b729a4597fee4617 Mon Sep 17 00:00:00 2001 From: Eduard Sukharev Date: Mon, 20 Mar 2023 09:14:44 +0300 Subject: [PATCH 3/6] Fix LFN lookup - reset tmp LFN when resetting LFN name checksum --- Marlin/src/sd/SdBaseFile.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Marlin/src/sd/SdBaseFile.cpp b/Marlin/src/sd/SdBaseFile.cpp index ea18f0bc8353..870c72ccee86 100644 --- a/Marlin/src/sd/SdBaseFile.cpp +++ b/Marlin/src/sd/SdBaseFile.cpp @@ -703,7 +703,10 @@ bool SdBaseFile::open(SdBaseFile *dirFile, const uint8_t dname[11] // Get VFat dir entry pvFat = (vfat_t *) p; // Get checksum from the last entry of the sequence - if (pvFat->sequenceNumber & 0x40) lfnChecksum = pvFat->checksum; + if (pvFat->sequenceNumber & 0x40) { + lfnChecksum = pvFat->checksum; + memset(lfnName, '\0', sizeof(lfnName)); + } // Get LFN sequence number lfnSequenceNumber = pvFat->sequenceNumber & 0x1F; if WITHIN(lfnSequenceNumber, 1, reqEntriesNum) { From 8dd95fa95459642016b96209fcfcccfd310d5beb Mon Sep 17 00:00:00 2001 From: Scott Lahteine Date: Thu, 23 Mar 2023 15:34:26 -0500 Subject: [PATCH 4/6] cleanup --- Marlin/src/sd/SdBaseFile.cpp | 66 ++++++++++++++++++------------------ Marlin/src/sd/SdBaseFile.h | 8 ++--- 2 files changed, 36 insertions(+), 38 deletions(-) diff --git a/Marlin/src/sd/SdBaseFile.cpp b/Marlin/src/sd/SdBaseFile.cpp index 870c72ccee86..92ba0714cd1c 100644 --- a/Marlin/src/sd/SdBaseFile.cpp +++ b/Marlin/src/sd/SdBaseFile.cpp @@ -714,9 +714,7 @@ bool SdBaseFile::open(SdBaseFile *dirFile, const uint8_t dname[11] if (lfnChecksum == pvFat->checksum) { // Set chunk of LFN from VFAT entry into lfnName getLFNName(pvFat, (char *)lfnName, lfnSequenceNumber); - #if ENABLED(UTF_FILENAME_SUPPORT) - convertUtf16ToUtf8((char *)lfnName); - #endif + TERN_(UTF_FILENAME_SUPPORT, convertUtf16ToUtf8((char *)lfnName)); // LFN found? if (!strncasecmp((char*)dlname, (char*)lfnName, lfnNameLength)) lfnFileFound = true; } @@ -1523,40 +1521,42 @@ int8_t SdBaseFile::readDir(dir_t *dir, char *longFilename) { } #if ENABLED(UTF_FILENAME_SUPPORT) - uint8_t SdBaseFile::convertUtf16ToUtf8(char *longFilename) { - #if LONG_FILENAME_CHARSIZE > 2 - // Add warning for developers for unsupported 3-byte cases. - // (Converting 2-byte codepoints to 3-byte in-place would break the rest of filename.) - #error "Currently filename re-encoding is done in-place. It may break the remaining chars to use 3-byte codepoints." - #endif - int16_t n; - // Reset n to the start of the long name - n = 0; - for (uint16_t idx = 0; idx < (LONG_FILENAME_LENGTH); idx += 2) { // idx is fixed since FAT LFN always contains UTF-16LE encoding - const uint16_t utf16_ch = longFilename[idx] | (longFilename[idx + 1] << 8); - if (0xD800 == (utf16_ch & 0xF800)) // Surrogate pair - encode as '_' + uint8_t SdBaseFile::convertUtf16ToUtf8(char *longFilename) { + #if LONG_FILENAME_CHARSIZE > 2 + // Add warning for developers for unsupported 3-byte cases. + // (Converting 2-byte codepoints to 3-byte in-place would break the rest of filename.) + #error "Currently filename re-encoding is done in-place. It may break the remaining chars to use 3-byte codepoints." + #endif + + int16_t n; + // Reset n to the start of the long name + n = 0; + for (uint16_t idx = 0; idx < (LONG_FILENAME_LENGTH); idx += 2) { // idx is fixed since FAT LFN always contains UTF-16LE encoding + const uint16_t utf16_ch = longFilename[idx] | (longFilename[idx + 1] << 8); + if (0xD800 == (utf16_ch & 0xF800)) // Surrogate pair - encode as '_' + longFilename[n++] = '_'; + else if (0 == (utf16_ch & 0xFF80)) // Encode as 1-byte UTF-8 char + longFilename[n++] = utf16_ch & 0x007F; + else if (0 == (utf16_ch & 0xF800)) { // Encode as 2-byte UTF-8 char + longFilename[n++] = 0xC0 | ((utf16_ch >> 6) & 0x1F); + longFilename[n++] = 0x80 | ( utf16_ch & 0x3F); + } + else { + #if LONG_FILENAME_CHARSIZE > 2 // Encode as 3-byte UTF-8 char + longFilename[n++] = 0xE0 | ((utf16_ch >> 12) & 0x0F); + longFilename[n++] = 0xC0 | ((utf16_ch >> 6) & 0x3F); + longFilename[n++] = 0xC0 | ( utf16_ch & 0x3F); + #else // Encode as '_' longFilename[n++] = '_'; - else if (0 == (utf16_ch & 0xFF80)) // Encode as 1-byte UTF-8 char - longFilename[n++] = utf16_ch & 0x007F; - else if (0 == (utf16_ch & 0xF800)) { // Encode as 2-byte UTF-8 char - longFilename[n++] = 0xC0 | ((utf16_ch >> 6) & 0x1F); - longFilename[n++] = 0x80 | ( utf16_ch & 0x3F); - } - else { - #if LONG_FILENAME_CHARSIZE > 2 // Encode as 3-byte UTF-8 char - longFilename[n++] = 0xE0 | ((utf16_ch >> 12) & 0x0F); - longFilename[n++] = 0xC0 | ((utf16_ch >> 6) & 0x3F); - longFilename[n++] = 0xC0 | ( utf16_ch & 0x3F); - #else // Encode as '_' - longFilename[n++] = '_'; - #endif - } - if (0 == utf16_ch) break; // End of filename - } // idx + #endif + } + if (0 == utf16_ch) break; // End of filename + } // idx - return n; + return n; } + #endif // UTF_FILENAME_SUPPORT // Read next directory entry into the cache diff --git a/Marlin/src/sd/SdBaseFile.h b/Marlin/src/sd/SdBaseFile.h index 9baa71e9336e..190305a367f2 100644 --- a/Marlin/src/sd/SdBaseFile.h +++ b/Marlin/src/sd/SdBaseFile.h @@ -394,18 +394,16 @@ class SdBaseFile { #if ENABLED(UTF_FILENAME_SUPPORT) uint8_t convertUtf16ToUtf8(char *longFilename); - #endif // UTF_FILENAME_SUPPORT + #endif // Long Filename create/write support #if ENABLED(LONG_FILENAME_WRITE_SUPPORT) static bool isDirLFN(const dir_t* dir); static bool isDirNameLFN(const char *dirname); static bool parsePath(const char *str, uint8_t *name, uint8_t *lname, const char **ptr); - /** - * Return the number of entries needed in the FAT for this LFN - */ + // Return the number of entries needed in the FAT for this LFN static inline uint8_t getLFNEntriesNum(const char *lname) { return (strlen(lname) + 12) / 13; } static void getLFNName(vfat_t *vFatDir, char *lname, uint8_t startOffset); static void setLFNName(vfat_t *vFatDir, char *lname, uint8_t lfnSequenceNumber); - #endif // LONG_FILENAME_WRITE_SUPPORT + #endif }; From 90cfb2a1b48af244f27476bb133d4d40b20babdf Mon Sep 17 00:00:00 2001 From: Scott Lahteine Date: Thu, 23 Mar 2023 15:36:17 -0500 Subject: [PATCH 5/6] const --- Marlin/src/sd/SdBaseFile.cpp | 4 ++-- Marlin/src/sd/SdBaseFile.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Marlin/src/sd/SdBaseFile.cpp b/Marlin/src/sd/SdBaseFile.cpp index 92ba0714cd1c..7000b03b478f 100644 --- a/Marlin/src/sd/SdBaseFile.cpp +++ b/Marlin/src/sd/SdBaseFile.cpp @@ -1428,7 +1428,7 @@ int16_t SdBaseFile::read(void *buf, uint16_t nbyte) { * readDir() called before a directory has been opened, this is not * a directory file or an I/O error occurred. */ -int8_t SdBaseFile::readDir(dir_t *dir, char *longFilename) { +int8_t SdBaseFile::readDir(dir_t *dir, char * const longFilename) { int16_t n; // if not a directory file or miss-positioned return an error if (!isDir() || (0x1F & curPosition_)) return -1; @@ -1522,7 +1522,7 @@ int8_t SdBaseFile::readDir(dir_t *dir, char *longFilename) { #if ENABLED(UTF_FILENAME_SUPPORT) - uint8_t SdBaseFile::convertUtf16ToUtf8(char *longFilename) { + uint8_t SdBaseFile::convertUtf16ToUtf8(char * const longFilename) { #if LONG_FILENAME_CHARSIZE > 2 // Add warning for developers for unsupported 3-byte cases. // (Converting 2-byte codepoints to 3-byte in-place would break the rest of filename.) diff --git a/Marlin/src/sd/SdBaseFile.h b/Marlin/src/sd/SdBaseFile.h index 190305a367f2..19225170f9ce 100644 --- a/Marlin/src/sd/SdBaseFile.h +++ b/Marlin/src/sd/SdBaseFile.h @@ -298,7 +298,7 @@ class SdBaseFile { bool printName(); int16_t read(); int16_t read(void *buf, uint16_t nbyte); - int8_t readDir(dir_t *dir, char *longFilename); + int8_t readDir(dir_t *dir, char * const longFilename); static bool remove(SdBaseFile *dirFile, const char *path); bool remove(); @@ -393,7 +393,7 @@ class SdBaseFile { dir_t* readDirCache(); #if ENABLED(UTF_FILENAME_SUPPORT) - uint8_t convertUtf16ToUtf8(char *longFilename); + uint8_t convertUtf16ToUtf8(char * const longFilename); #endif // Long Filename create/write support From 13719502759693d9a74105d914a56ff08cd72cca Mon Sep 17 00:00:00 2001 From: Scott Lahteine Date: Sun, 26 Mar 2023 04:26:06 -0500 Subject: [PATCH 6/6] tweaks --- Marlin/src/sd/SdBaseFile.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Marlin/src/sd/SdBaseFile.cpp b/Marlin/src/sd/SdBaseFile.cpp index 7000b03b478f..72cdcb926829 100644 --- a/Marlin/src/sd/SdBaseFile.cpp +++ b/Marlin/src/sd/SdBaseFile.cpp @@ -705,7 +705,7 @@ bool SdBaseFile::open(SdBaseFile *dirFile, const uint8_t dname[11] // Get checksum from the last entry of the sequence if (pvFat->sequenceNumber & 0x40) { lfnChecksum = pvFat->checksum; - memset(lfnName, '\0', sizeof(lfnName)); + ZERO(lfnName); } // Get LFN sequence number lfnSequenceNumber = pvFat->sequenceNumber & 0x1F; @@ -1136,13 +1136,13 @@ bool SdBaseFile::openNext(SdBaseFile *dirFile, uint8_t oflag) { * Get the LFN filename block from a dir. Get the block in lname at startOffset */ void SdBaseFile::getLFNName(vfat_t *pFatDir, char *lname, uint8_t sequenceNumber) { - uint8_t startOffset = (sequenceNumber - 1) * FILENAME_LENGTH; + const uint8_t startOffset = (sequenceNumber - 1) * FILENAME_LENGTH; LOOP_L_N(i, FILENAME_LENGTH) { const uint16_t utf16_ch = (i >= 11) ? pFatDir->name3[i - 11] : (i >= 5) ? pFatDir->name2[i - 5] : pFatDir->name1[i]; #if ENABLED(UTF_FILENAME_SUPPORT) // We can't reconvert to UTF-8 here as UTF-8 is variable-size encoding, but joining LFN blocks // needs static bytes addressing. So here just store full UTF-16LE words to re-convert later. - uint16_t idx = (startOffset + i) * 2; // This is fixed as FAT LFN always contain UTF-16LE encoding + const uint16_t idx = (startOffset + i) * 2; // This is fixed as FAT LFN always contain UTF-16LE encoding lname[idx] = utf16_ch & 0xFF; lname[idx + 1] = (utf16_ch >> 8) & 0xFF; #else @@ -1156,8 +1156,8 @@ bool SdBaseFile::openNext(SdBaseFile *dirFile, uint8_t oflag) { * Set the LFN filename block lname to a dir. Put the block based on sequence number */ void SdBaseFile::setLFNName(vfat_t *pFatDir, char *lname, uint8_t sequenceNumber) { - uint8_t startOffset = (sequenceNumber - 1) * FILENAME_LENGTH; - uint8_t nameLength = strlen(lname); + const uint8_t startOffset = (sequenceNumber - 1) * FILENAME_LENGTH, + nameLength = strlen(lname); LOOP_L_N(i, FILENAME_LENGTH) { uint16_t ch = 0; if ((startOffset + i) < nameLength)