From b4e8e5abffa0d948e0cc44e90724d049e5bb8869 Mon Sep 17 00:00:00 2001 From: David Adam Date: Thu, 6 Jan 2022 23:37:21 +0800 Subject: [PATCH] __fish_print_apt_packages: stringify GNU tr is not Unicode-aware, and was corrupting descriptions that had non-ASCII characters. Additionally, rather than using the Unicode private use characters, use the ASCII/UTF-8 record separator character as it was intended. The sed command could probably be rewritten to do all the heavy lifting here, but would be even less readable. Closes #8575. --- share/functions/__fish_print_apt_packages.fish | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/share/functions/__fish_print_apt_packages.fish b/share/functions/__fish_print_apt_packages.fish index a582c8c67..7e014aa50 100644 --- a/share/functions/__fish_print_apt_packages.fish +++ b/share/functions/__fish_print_apt_packages.fish @@ -12,20 +12,23 @@ function __fish_print_apt_packages # Do not generate the cache as apparently sometimes this is slow. # http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=547550 # (It is safe to use `sed -r` here as we are guaranteed to be on a GNU platform - # if apt-cache was found. Using unicode reserved range in `fish/tr` and the - # little-endian bytecode equivalent in `sed`. Supports localization.) + # if apt-cache was found.) + # Uses the UTF-8/ASCII record separator (0x1A) character. # # Note: This can include "Description:" fields which we need to include, # "Description-en_GB" (or another locale code) fields which we need to include # as well as "Description-md5" fields which we absolutely do *not* want to include # The regex doesn't allow numbers, so unless someone makes a hash algorithm without a number in the name, # we're safe. (yes, this should absolutely have a better format). - apt-cache --no-generate show '.*'(commandline -ct)'.*' 2>/dev/null | sed -r '/^(Package|Description-?[a-zA-Z_]*):/!d;s/Package: (.*)/\1\t/g;s/Description-?[^:]*: (.*)/\1\xee\x80\x80\x0a/g' | tr -d \n | tr -s \uE000 \n | uniq + # + # aptitude has options that control the output formatting, but is orders of magnitude slower + # + # sed could probably do all of the heavy lifting here, but would be even less readable + apt-cache --no-generate show '.*'(commandline -ct)'.*' 2>/dev/null | sed -r '/^(Package|Description-?[a-zA-Z_]*):/!d;s/Package: (.*)/\1\t/g;s/Description-?[^:]*: (.*)/\1\x1a\n/g' | string join "" | string replace --all --regex \x1a+ \n | uniq return 0 else set -l packages (dpkg --get-selections | string replace -fr '(\S+)\s+install' "\$1" | string match -e (commandline -ct)) - apt-cache --no-generate show $packages 2>/dev/null | sed -r '/^(Package|Description-?[a-zA-Z_]*):/!d;s/Package: (.*)/\1\t/g;s/Description-?[^:]*: (.*)/\1\xee\x80\x80\x0a/g' | tr -d \n | tr -s \uE000 \n | uniq - + apt-cache --no-generate show $packages 2>/dev/null | sed -r '/^(Package|Description-?[a-zA-Z_]*):/!d;s/Package: (.*)/\1\t/g;s/Description-?[^:]*: (.*)/\1\x1a\n/g' | string join "" | string replace --all --regex \x1a+ \n | uniq return 0 end end