From f18d9a529834b4955ef8f788e05107db810a8676 Mon Sep 17 00:00:00 2001 From: Siyuan Chen Date: Thu, 9 May 2024 17:16:04 +0800 Subject: [PATCH] Fix the issue where rg.el couldn't search Unicode. The reason of rg.el wasn't able to search Unicode character on Windows is because at the moment NTEmacs limits non-ASCII file arguments to the current codepage. See https://github.com/emacs-mirror/emacs/blob/58a7b99823c5c42161e9acf2abf6c22afd4da4cd/src/w32.c#L1648. This patch provides a workaround: Instead of passing Unicode arguments to ripgrep via Emacs, it via a temp .bat script, which was generated whenever `rg-build-command`. This allows rg.el now to search the entire Unicode planes, rather than being restricted to a specific codepage. P.s. This feature is disabled by default for keeping old behavior, enable it via set rg-w32-unicode = t. --- rg.el | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/rg.el b/rg.el index 5108a9d..e754380 100644 --- a/rg.el +++ b/rg.el @@ -144,6 +144,19 @@ Disabling this setting can break functionality of this package." :type 'boolean :group 'rg) +(defcustom rg-w32-unicode nil + "Enable Unicode support on Windows. +A workaround for NTEmacs subprocess not supporting Unicode arguments." + :type 'boolean + :group 'rg) + +(defcustom rg-w32-ripgrep-proxy + (expand-file-name "rg-w32-ripgrep-proxy.bat" user-emacs-directory) + "An automatically generated temporary batch file. +Used to proxy ripgrep Unicode arguments." + :type 'string + :group 'rg) + ;;;###autoload (defvar rg-command-line-flags-function 'identity "Function to modify command line flags of a search. @@ -297,10 +310,19 @@ are command line flags to use for the search." (when (member system-type '(darwin windows-nt)) (list "."))))) - (grep-expand-template - (mapconcat 'identity (cons (rg-executable) (delete-dups command-line)) " ") - pattern - (if (rg-is-custom-file-pattern files) "custom" files)))) + (let ((command (grep-expand-template + (mapconcat 'identity (cons (rg-executable) (delete-dups command-line)) " ") + pattern + (if (rg-is-custom-file-pattern files) "custom" files)))) + (cond ((and (eq system-type 'windows-nt) rg-w32-unicode) + (with-temp-file rg-w32-ripgrep-proxy + (set-buffer-multibyte t) + (setq buffer-file-coding-system 'utf-8-dos) + (insert (format "@echo off\n")) + (insert (format "chcp 65001 > nul\n")) + (insert (format "%s\n" command))) + rg-w32-ripgrep-proxy) + (t command))))) (defun rg-invoke-rg-type-list () "Invokes rg --type-list and return the result."