Port the rest of wutil

2024-11-25 17:55:50 +08:00 · 2023-04-10 09:18:02 +02:00 · 2023-04-10 09:18:02 +02:00 · f53aa6f2e3
commit f53aa6f2e3
parent d3a7e3ffd9
2 changed files with 680 additions and 41 deletions
--- a/fish-rust/src/wcstringutil.rs
+++ b/fish-rust/src/wcstringutil.rs
@ -58,6 +58,14 @@ fn wcs2string_bad_char(c: char) {
    );
 }

+/// Split a string by a separator character.
+pub fn split_string(val: &wstr, sep: char) -> Vec<WString> {
+    val.as_char_slice()
+        .split(|c| *c == sep)
+        .map(WString::from_chars)
+        .collect()
+}
+
 /// Joins strings with a separator.
 pub fn join_strings<S: AsRef<wstr>>(strs: &[S], sep: char) -> WString {
    if strs.is_empty() {
--- a/fish-rust/src/wutil/mod.rs
+++ b/fish-rust/src/wutil/mod.rs
@ -5,19 +5,68 @@ pub mod printf;
 pub mod wcstod;
 pub mod wcstoi;

-use crate::common::fish_reserved_codepoint;
-use crate::common::{str2wcstring, wcs2zstring};
+use crate::common::{
+    cstr2wcstring, fish_reserved_codepoint, str2wcstring, wcs2osstring, wcs2string, wcs2zstring,
+};
+use crate::fallback;
+use crate::fds::AutoCloseFd;
+use crate::flog::FLOGF;
 use crate::wchar::{wstr, WString, L};
-use crate::wcstringutil::join_strings;
+use crate::wcstringutil::{join_strings, split_string, wcs2string_callback};
 pub(crate) use gettext::{wgettext, wgettext_fmt};
+use libc::{
+    DT_BLK, DT_CHR, DT_DIR, DT_FIFO, DT_LNK, DT_REG, DT_SOCK, EACCES, EIO, ELOOP, ENAMETOOLONG,
+    ENODEV, ENOENT, ENOTDIR, S_IFBLK, S_IFCHR, S_IFDIR, S_IFIFO, S_IFLNK, S_IFMT, S_IFREG,
+    S_IFSOCK,
+};
 pub(crate) use printf::sprintf;
 use std::ffi::OsStr;
+use std::fs;
 use std::fs::canonicalize;
+use std::io::Write;
+use std::os::fd::RawFd;
+use std::os::fd::{FromRawFd, IntoRawFd};
 use std::os::unix::prelude::{OsStrExt, OsStringExt};
 pub use wcstoi::*;
+use widestring_suffix::widestrs;
+
+/// Wide character version of opendir(). Note that opendir() is guaranteed to set close-on-exec by
+/// POSIX (hooray).
+pub fn wopendir(name: &wstr) -> *mut libc::DIR {
+    let tmp = wcs2zstring(name);
+    unsafe { libc::opendir(tmp.as_ptr()) }
+}
+
+/// Wide character version of stat().
+pub fn wstat(file_name: &wstr) -> Option<fs::Metadata> {
+    let tmp = wcs2osstring(file_name);
+    fs::metadata(tmp).ok()
+}
+
+/// Wide character version of lstat().
+pub fn lwstat(file_name: &wstr) -> Option<fs::Metadata> {
+    let tmp = wcs2osstring(file_name);
+    fs::symlink_metadata(tmp).ok()
+}
+
+/// Wide character version of access().
+pub fn waccess(file_name: &wstr, mode: libc::c_int) -> libc::c_int {
+    let tmp = wcs2zstring(file_name);
+    unsafe { libc::access(tmp.as_ptr(), mode) }
+}
+
+/// Wide character version of unlink().
+pub fn wunlink(file_name: &wstr) -> libc::c_int {
+    let tmp = wcs2zstring(file_name);
+    unsafe { libc::unlink(tmp.as_ptr()) }
+}
+
+pub fn wperror(s: &wstr) {
+    // TODO This should not crash on invalid UTF-8
+    perror(std::str::from_utf8(&wcs2string(s)).unwrap())
+}

 /// Port of the wide-string wperror from `src/wutil.cpp` but for rust `&str`.
-use std::io::Write;
 pub fn perror(s: &str) {
    let e = errno::errno().0;
    let mut stderr = std::io::stderr().lock();
@ -33,6 +82,55 @@ pub fn perror(s: &str) {
    let _ = stderr.write_all(b"\n");
 }

+/// Wide character version of getcwd().
+pub fn wgetcwd() -> WString {
+    let mut cwd = [b'\0'; libc::PATH_MAX as usize];
+    let res = unsafe {
+        libc::getcwd(
+            std::ptr::addr_of_mut!(cwd).cast(),
+            std::mem::size_of_val(&cwd),
+        )
+    };
+    if !res.is_null() {
+        return cstr2wcstring(&cwd);
+    }
+
+    FLOGF!(
+        error,
+        "getcwd() failed with errno %d/%s",
+        errno::errno().0,
+        "errno::errno"
+    );
+    WString::new()
+}
+
+/// Wide character version of readlink().
+pub fn wreadlink(file_name: &wstr) -> Option<WString> {
+    let md = lwstat(file_name)?;
+    let bufsize = usize::try_from(md.len()).unwrap() + 1;
+    let mut target_buf = vec![b'\0'; bufsize];
+    let tmp = wcs2zstring(file_name);
+    let nbytes = unsafe {
+        libc::readlink(
+            tmp.as_ptr(),
+            std::ptr::addr_of_mut!(target_buf[0]).cast(),
+            bufsize,
+        )
+    };
+    if nbytes == -1 {
+        perror("readlink");
+        return None;
+    }
+    // The link might have been modified after our call to lstat.  If the link now points to a path
+    // that's longer than the original one, we can't read everything in our buffer.  Simply give
+    // up. We don't need to report an error since our only caller will already fall back to ENOENT.
+    let nbytes = usize::try_from(nbytes).unwrap();
+    if nbytes == bufsize {
+        return None;
+    }
+    Some(str2wcstring(&target_buf[0..nbytes]))
+}
+
 /// Wide character realpath. The last path component does not need to be valid. If an error occurs,
 /// `wrealpath()` returns `None`
 pub fn wrealpath(pathname: &wstr) -> Option<WString> {
@ -141,43 +239,6 @@ pub fn normalize_path(path: &wstr, allow_leading_double_slashes: bool) -> WStrin
    result
 }

-const PUA1_START: char = '\u{E000}';
-const PUA1_END: char = '\u{F900}';
-const PUA2_START: char = '\u{F0000}';
-const PUA2_END: char = '\u{FFFFE}';
-const PUA3_START: char = '\u{100000}';
-const PUA3_END: char = '\u{10FFFE}';
-
-/// Return one if the code point is in a Unicode private use area.
-fn fish_is_pua(c: char) -> bool {
-    PUA1_START <= c && c < PUA1_END
-}
-
-/// We need this because there are too many implementations that don't return the proper answer for
-/// some code points. See issue #3050.
-pub fn fish_iswalnum(c: char) -> bool {
-    !fish_reserved_codepoint(c) && !fish_is_pua(c) && c.is_alphanumeric()
-}
-
-/// Given that \p cursor is a pointer into \p base, return the offset in characters.
-/// This emulates C pointer arithmetic:
-///    `wstr_offset_in(cursor, base)` is equivalent to C++ `cursor - base`.
-pub fn wstr_offset_in(cursor: &wstr, base: &wstr) -> usize {
-    let cursor = cursor.as_slice();
-    let base = base.as_slice();
-    // cursor may be a zero-length slice at the end of base,
-    // which base.as_ptr_range().contains(cursor.as_ptr()) will reject.
-    let base_range = base.as_ptr_range();
-    let curs_range = cursor.as_ptr_range();
-    assert!(
-        base_range.start <= curs_range.start && curs_range.end <= base_range.end,
-        "cursor should be a subslice of base"
-    );
-    let offset = unsafe { cursor.as_ptr().offset_from(base.as_ptr()) };
-    assert!(offset >= 0, "offset should be non-negative");
-    offset as usize
-}
-
 #[test]
 fn test_normalize_path() {
    fn norm_path(path: &wstr) -> WString {
@ -208,6 +269,576 @@ fn test_normalize_path() {
    assert_eq!(norm_path(L!("foo/././bar/.././baz")), "foo/baz");
 }

+/// Given an input path \p path and a working directory \p wd, do a "normalizing join" in a way
+/// appropriate for cd. That is, return effectively wd + path while resolving leading ../s from
+/// path. The intent here is to allow 'cd' out of a directory which may no longer exist, without
+/// allowing 'cd' into a directory that may not exist; see #5341.
+#[widestrs]
+pub fn path_normalize_for_cd(wd: &wstr, path: &wstr) -> WString {
+    // Fast paths.
+    const sep: char = '/';
+    assert!(
+        wd.as_char_slice().first() == Some(&'/') && wd.as_char_slice().last() == Some(&'/'),
+        "Invalid working directory, it must start and end with /"
+    );
+    if path.is_empty() {
+        return wd.to_owned();
+    } else if path.as_char_slice().first() == Some(&sep) {
+        return path.to_owned();
+    } else if path.as_char_slice().first() != Some(&'.') {
+        return wd.to_owned() + path;
+    }
+
+    // Split our strings by the sep.
+    let mut wd_comps = split_string(wd, sep);
+    let path_comps = split_string(path, sep);
+
+    // Remove empty segments from wd_comps.
+    // In particular this removes the leading and trailing empties.
+    wd_comps.retain(|comp| !comp.is_empty());
+
+    // Erase leading . and .. components from path_comps, popping from wd_comps as we go.
+    let mut erase_count = 0;
+    for comp in &path_comps {
+        let mut erase_it = false;
+        if comp.is_empty() || comp == "."L {
+            erase_it = true;
+        } else if comp == ".."L && !wd_comps.is_empty() {
+            erase_it = true;
+            wd_comps.pop();
+        }
+        if erase_it {
+            erase_count += 1;
+        } else {
+            break;
+        }
+    }
+    // Append un-erased elements to wd_comps and join them, then prepend the leading /.
+    wd_comps.extend(path_comps.into_iter().skip(erase_count));
+
+    let mut result = join_strings(&wd_comps, sep);
+    result.insert(0, '/');
+    result
+}
+
+/// Wide character version of dirname().
+#[widestrs]
+pub fn wdirname(mut path: WString) -> WString {
+    // Do not use system-provided dirname (#7837).
+    // On Mac it's not thread safe, and will error for paths exceeding PATH_MAX.
+    // This follows OpenGroup dirname recipe.
+
+    // 1: Double-slash stays.
+    if path == "//"L {
+        return path;
+    }
+
+    // 2: All slashes => return slash.
+    if !path.is_empty() && path.chars().find(|c| *c == '/').is_none() {
+        return "/"L.to_owned();
+    }
+
+    // 3: Trim trailing slashes.
+    while path.as_char_slice().last() == Some(&'/') {
+        path.pop();
+    }
+
+    // 4: No slashes left => return period.
+    let Some(last_slash) = path.chars().rev().position(|c| c == '/') else {
+        return "."L.to_owned()
+    };
+
+    // 5: Remove trailing non-slashes.
+    path.truncate(last_slash + 1);
+
+    // 6: Skip as permitted.
+    // 7: Remove trailing slashes again.
+    while path.as_char_slice().last() == Some(&'/') {
+        path.pop();
+    }
+
+    // 8: Empty => return slash.
+    if path.is_empty() {
+        path = "/"L.to_owned();
+    }
+    path
+}
+
+/// Wide character version of basename().
+#[widestrs]
+pub fn wbasename(mut path: WString) -> WString {
+    // This follows OpenGroup basename recipe.
+    // 1: empty => allowed to return ".". This is what system impls do.
+    if path.is_empty() {
+        return "."L.to_owned();
+    }
+
+    // 2: Skip as permitted.
+    // 3: All slashes => return slash.
+    if !path.is_empty() && path.chars().find(|c| *c == '/').is_none() {
+        return "/"L.to_owned();
+    }
+
+    // 4: Remove trailing slashes.
+    // while (!path.is_empty() && path.back() == '/') path.pop_back();
+    while path.as_char_slice().last() == Some(&'/') {
+        path.pop();
+    }
+
+    // 5: Remove up to and including last slash.
+    if let Some(last_slash) = path.chars().rev().position(|c| c == '/') {
+        path.truncate(last_slash + 1);
+    };
+    path
+}
+
+/// Wide character version of mkdir.
+pub fn wmkdir(name: &wstr, mode: libc::mode_t) -> libc::c_int {
+    let name_narrow = wcs2zstring(name);
+    unsafe { libc::mkdir(name_narrow.as_ptr(), mode) }
+}
+
+/// Wide character version of rename.
+pub fn wrename(old_name: &wstr, new_name: &wstr) -> libc::c_int {
+    let old_narrow = wcs2zstring(old_name);
+    let new_narrow = wcs2zstring(new_name);
+    unsafe { libc::rename(old_narrow.as_ptr(), new_narrow.as_ptr()) }
+}
+
+fn write_to_fd(input: &[u8], fd: RawFd) -> std::io::Result<usize> {
+    let mut file = unsafe { std::fs::File::from_raw_fd(fd) };
+    let amt = file.write(input);
+    // Ensure the file is not closed.
+    file.into_raw_fd();
+    amt
+}
+
+/// Write a wide string to a file descriptor. This avoids doing any additional allocation.
+/// This does NOT retry on EINTR or EAGAIN, it simply returns.
+/// \return -1 on error in which case errno will have been set. In this event, the number of bytes
+/// actually written cannot be obtained.
+pub fn wwrite_to_fd(input: &wstr, fd: RawFd) -> Option<usize> {
+    // Accumulate data in a local buffer.
+    let mut accum = [b'\0'; 512];
+    let mut accumlen = 0;
+    let maxaccum: usize = std::mem::size_of_val(&accum);
+
+    // Helper to perform a write to 'fd', looping as necessary.
+    // \return true on success, false on error.
+    let mut total_written = 0;
+
+    fn do_write(fd: RawFd, total_written: &mut usize, mut buf: &[u8]) -> bool {
+        while !buf.is_empty() {
+            let Ok(amt) = write_to_fd(buf, fd) else {
+                return false;
+            };
+            *total_written += amt;
+            assert!(amt <= buf.len(), "Wrote more than requested");
+            buf = &buf[amt..];
+        }
+        true
+    }
+
+    // Helper to flush the accumulation buffer.
+    let flush_accum = |total_written: &mut usize, accum: &[u8], accumlen: &mut usize| {
+        if !do_write(fd, total_written, &accum[..*accumlen]) {
+            return false;
+        }
+        *accumlen = 0;
+        true
+    };
+
+    let mut success = wcs2string_callback(input, |buff: &[u8]| {
+        if buff.len() + accumlen > maxaccum {
+            // We have to flush.
+            if !flush_accum(&mut total_written, &accum, &mut accumlen) {
+                return false;
+            }
+        }
+        if buff.len() + accumlen <= maxaccum {
+            // Accumulate more.
+            unsafe {
+                std::ptr::copy(&buff[0], &mut accum[accumlen], buff.len());
+            }
+            true
+        } else {
+            // Too much data to even fit, just write it immediately.
+            do_write(fd, &mut total_written, buff)
+        }
+    });
+    // Flush any remaining.
+    if success {
+        success = flush_accum(&mut total_written, &accum, &mut accumlen);
+    }
+    if success {
+        Some(total_written)
+    } else {
+        None
+    }
+}
+
+const PUA1_START: char = '\u{E000}';
+const PUA1_END: char = '\u{F900}';
+const PUA2_START: char = '\u{F0000}';
+const PUA2_END: char = '\u{FFFFE}';
+const PUA3_START: char = '\u{100000}';
+const PUA3_END: char = '\u{10FFFE}';
+
+/// Return one if the code point is in a Unicode private use area.
+fn fish_is_pua(c: char) -> bool {
+    PUA1_START <= c && c < PUA1_END
+}
+
+/// We need this because there are too many implementations that don't return the proper answer for
+/// some code points. See issue #3050.
+pub fn fish_iswalnum(c: char) -> bool {
+    !fish_reserved_codepoint(c) && !fish_is_pua(c) && c.is_alphanumeric()
+}
+
+extern "C" {
+    fn iswgraph(wc: libc::wchar_t) -> libc::c_int; // Technically it's wint_t
+}
+
+/// We need this because there are too many implementations that don't return the proper answer for
+/// some code points. See issue #3050.
+pub fn fish_iswgraph(c: char) -> bool {
+    !fish_reserved_codepoint(c) && (fish_is_pua(c) || unsafe { iswgraph(c as libc::wchar_t) } != 0)
+}
+
+pub fn fish_wcswidth(s: &wstr) -> libc::c_int {
+    fallback::fish_wcswidth(s)
+}
+
+/// Class for representing a file's inode. We use this to detect and avoid symlink loops, among
+/// other things. While an inode / dev pair is sufficient to distinguish co-existing files, Linux
+/// seems to aggressively re-use inodes, so it cannot determine if a file has been deleted (ABA
+/// problem). Therefore we include richer information.
+#[derive(Clone, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub struct FileId {
+    device: libc::dev_t,
+    inode: libc::ino_t,
+    size: u64,
+    change_seconds: libc::time_t,
+    change_nanoseconds: i64,
+    mod_seconds: libc::time_t,
+    mod_nanoseconds: i64,
+}
+
+impl FileId {
+    pub const fn new() -> Self {
+        FileId {
+            device: -1 as _,
+            inode: -1 as _,
+            size: -1 as _,
+            change_seconds: libc::time_t::MIN,
+            change_nanoseconds: i64::MIN,
+            mod_seconds: libc::time_t::MIN,
+            mod_nanoseconds: -1 as _,
+        }
+    }
+    pub fn from_stat(buf: &libc::stat) -> FileId {
+        let mut result = FileId::new();
+        result.device = buf.st_dev;
+        result.inode = buf.st_ino;
+        result.size = buf.st_size as u64;
+        result.change_seconds = buf.st_ctime;
+        result.mod_seconds = buf.st_mtime;
+        #[allow(clippy::unnecessary_cast)] // platform-dependent
+        {
+            result.change_nanoseconds = buf.st_ctime_nsec as _;
+            result.mod_nanoseconds = buf.st_mtime_nsec as _;
+        }
+        result
+    }
+
+    /// \return true if \param rhs has higher mtime seconds than this file_id_t.
+    /// If identical, nanoseconds are compared.
+    pub fn older_than(&self, rhs: &FileId) -> bool {
+        let lhs = (self.mod_seconds, self.mod_nanoseconds);
+        let rhs = (rhs.mod_seconds, rhs.mod_nanoseconds);
+        lhs.cmp(&rhs).is_lt()
+    }
+
+    pub fn dump(&self) -> WString {
+        let mut result = WString::new();
+        result += &sprintf!("     device: %lld\n", self.device)[..];
+        result += &sprintf!("      inode: %lld\n", self.inode)[..];
+        result += &sprintf!("       size: %lld\n", self.size)[..];
+        result += &sprintf!("     change: %lld\n", self.change_seconds)[..];
+        result += &sprintf!("change_nano: %lld\n", self.change_nanoseconds)[..];
+        result += &sprintf!("        mod: %lld\n", self.mod_seconds)[..];
+        result += &sprintf!("   mod_nano: %lld", self.mod_nanoseconds)[..];
+        result
+    }
+}
+
+pub const INVALID_FILE_ID: FileId = FileId::new();
+
+pub fn file_id_for_fd(fd: RawFd) -> FileId {
+    let mut result = INVALID_FILE_ID;
+    let mut buf: libc::stat = unsafe { std::mem::zeroed() };
+    if fd >= 0 && unsafe { libc::fstat(fd, &mut buf) } == 0 {
+        result = FileId::from_stat(&buf);
+    }
+    result
+}
+
+pub fn file_id_for_autoclose_fd(fd: &AutoCloseFd) -> FileId {
+    file_id_for_fd(fd.fd())
+}
+
+pub fn file_id_for_path(path: &wstr) -> FileId {
+    let mut result = INVALID_FILE_ID;
+    let path = wcs2zstring(path);
+    let mut buf: libc::stat = unsafe { std::mem::zeroed() };
+    if unsafe { libc::stat(path.as_ptr(), &mut buf) } == 0 {
+        result = FileId::from_stat(&buf);
+    }
+    result
+}
+
+/// Types of files that may be in a directory.
+#[derive(Clone, Copy, Eq, PartialEq)]
+pub enum DirEntryType {
+    fifo = 1, // FIFO file
+    chr,      // character device
+    dir,      // directory
+    blk,      // block device
+    reg,      // regular file
+    lnk,      // symlink
+    sock,     // socket
+    whiteout, // whiteout (from BSD)
+}
+
+/// An entry returned by dir_iter_t.
+#[derive(Default)]
+pub struct DirEntry {
+    /// File name of this entry.
+    pub name: WString,
+
+    /// inode of this entry.
+    pub inode: libc::ino_t,
+
+    // Stat buff for this entry, or none if not yet computed.
+    stat: Option<libc::stat>,
+
+    // The type of the entry. This is initially none; it may be populated eagerly via readdir()
+    // on some filesystems, or later via stat(). If stat() fails, the error is silently ignored
+    // and the type is left as none(). Note this is an unavoidable race.
+    typ: Option<DirEntryType>,
+
+    // fd of the DIR*, used for fstatat().
+    dirfd: RawFd,
+}
+
+impl DirEntry {
+    /// \return the type of this entry if it is already available, otherwise none().
+    pub fn fast_type(&self) -> Option<DirEntryType> {
+        self.typ
+    }
+
+    /// \return the type of this entry, falling back to stat() if necessary.
+    /// If stat() fails because the file has disappeared, this will return none().
+    /// If stat() fails because of a broken symlink, this will return type lnk.
+    pub fn check_type(&mut self) -> Option<DirEntryType> {
+        // Call stat if needed to populate our type, swallowing errors.
+        if self.typ.is_none() {
+            self.do_stat()
+        }
+        self.typ
+    }
+
+    /// \return whether this is a directory. This may call stat().
+    pub fn is_dir(&mut self) -> bool {
+        self.check_type() == Some(DirEntryType::dir)
+    }
+
+    /// \return the stat buff for this entry, invoking stat() if necessary.
+    pub fn stat(&mut self) -> Option<libc::stat> {
+        if self.stat.is_none() {
+            self.do_stat();
+        }
+        self.stat
+    }
+
+    // Reset our fields.
+    fn reset(&mut self) {
+        self.name.clear();
+        self.inode = unsafe { std::mem::zeroed() };
+        self.typ = None;
+        self.stat = None;
+    }
+
+    // Populate our stat buffer, and type. Errors are silently ignored.
+    fn do_stat(&mut self) {
+        // We want to set both our type and our stat buffer.
+        // If we follow symlinks and stat() errors with a bad symlink, set the type to link, but do not
+        // populate the stat buffer.
+        if self.dirfd < 0 {
+            return;
+        }
+        let narrow = wcs2zstring(&self.name);
+        let mut s: libc::stat = unsafe { std::mem::zeroed() };
+        if unsafe { libc::fstatat(self.dirfd, narrow.as_ptr(), &mut s, 0) } == 0 {
+            self.stat = Some(s);
+            self.typ = stat_mode_to_entry_type(s.st_mode);
+        } else {
+            match errno::errno().0 {
+                ELOOP => {
+                    self.typ = Some(DirEntryType::lnk);
+                }
+                EACCES | EIO | ENOENT | ENOTDIR | ENAMETOOLONG | ENODEV => {
+                    // These are "expected" errors.
+                    self.typ = None;
+                }
+                _ => {
+                    self.typ = None;
+                    // This used to print an error, but given that we have seen
+                    // both ENODEV (above) and ENOTCONN,
+                    // and that the error isn't actionable and shows up while typing,
+                    // let's not do that.
+                    // perror("fstatat");
+                }
+            }
+        }
+    }
+}
+
+fn dirent_type_to_entry_type(dt: u8) -> Option<DirEntryType> {
+    match dt {
+        DT_FIFO => Some(DirEntryType::fifo),
+        DT_CHR => Some(DirEntryType::chr),
+        DT_DIR => Some(DirEntryType::dir),
+        DT_BLK => Some(DirEntryType::blk),
+        DT_REG => Some(DirEntryType::reg),
+        DT_LNK => Some(DirEntryType::lnk),
+        DT_SOCK => Some(DirEntryType::sock),
+        // todo! whiteout
+        _ => None,
+    }
+}
+
+fn stat_mode_to_entry_type(m: libc::mode_t) -> Option<DirEntryType> {
+    match m & S_IFMT {
+        S_IFIFO => Some(DirEntryType::fifo),
+        S_IFCHR => Some(DirEntryType::chr),
+        S_IFDIR => Some(DirEntryType::dir),
+        S_IFBLK => Some(DirEntryType::blk),
+        S_IFREG => Some(DirEntryType::reg),
+        S_IFLNK => Some(DirEntryType::lnk),
+        S_IFSOCK => Some(DirEntryType::sock),
+        _ => {
+            // todo! whiteout
+            None
+        }
+    }
+}
+
+/// Class for iterating over a directory, wrapping readdir().
+/// This allows enumerating the contents of a directory, exposing the file type if the filesystem
+/// itself exposes that from readdir(). stat() is incurred only if necessary: if the entry is a
+/// symlink, or if the caller asks for the stat buffer.
+/// Symlinks are followed.
+pub struct DirIter {
+    /// Whether this dir_iter considers the "." and ".." filesystem entries.
+    withdot: bool,
+
+    dir: *mut libc::DIR,
+    error: libc::c_int,
+    entry: DirEntry,
+}
+
+impl DirIter {
+    /// Open a directory at a given path. On failure, \p error() will return the error code.
+    /// Note opendir is guaranteed to set close-on-exec by POSIX (hooray).
+    pub fn new(path: &wstr, withdot: bool) -> Self {
+        let mut error = 0;
+        let dir = wopendir(path);
+        if dir.is_null() {
+            error = errno::errno().0;
+        }
+        let entry = DirEntry {
+            dirfd: unsafe { libc::dirfd(dir) },
+            ..Default::default()
+        };
+        DirIter {
+            withdot,
+            dir,
+            error,
+            entry,
+        }
+    }
+
+    /// Rewind the directory to the beginning.
+    pub fn rewind(&mut self) {
+        if self.dir.is_null() {
+            unsafe { libc::rewinddir(self.dir) };
+        }
+    }
+
+    pub fn next(&mut self) -> Option<&DirEntry> {
+        if self.dir.is_null() {
+            return None;
+        }
+        errno::set_errno(errno::Errno(0));
+        let dent = unsafe { libc::readdir(self.dir) };
+        if dent.is_null() {
+            self.error = errno::errno().0;
+            return None;
+        }
+        let dent = unsafe { &*dent };
+        // Skip . and ..,
+        // unless we've been told not to.
+        if !self.withdot
+            && [
+                &[b'.' as i8, b'\0' as i8, b'\0' as i8][..],
+                &[b'.' as i8, b'.' as i8, b'\0' as i8][..],
+            ]
+            .contains(&&dent.d_name[..3])
+        {
+            return self.next();
+        }
+
+        self.entry.reset();
+        let d_name: Vec<u8> = dent.d_name.iter().map(|b| *b as u8).collect();
+        self.entry.name = cstr2wcstring(&d_name);
+        #[cfg(any(target_os = "freebsd", target_os = "netbsd", target_os = "openbsd"))]
+        {
+            self.entry.inode = dent.d_fileno;
+        }
+        #[cfg(not(any(target_os = "freebsd", target_os = "netbsd", target_os = "openbsd")))]
+        {
+            self.entry.inode = dent.d_ino;
+        }
+        let typ = dirent_type_to_entry_type(dent.d_type);
+        // Do not store symlinks as we will need to resolve them.
+        if typ != Some(DirEntryType::lnk) {
+            self.entry.typ = typ;
+        }
+
+        Some(&self.entry)
+    }
+}
+
+/// Given that \p cursor is a pointer into \p base, return the offset in characters.
+/// This emulates C pointer arithmetic:
+///    `wstr_offset_in(cursor, base)` is equivalent to C++ `cursor - base`.
+pub fn wstr_offset_in(cursor: &wstr, base: &wstr) -> usize {
+    let cursor = cursor.as_slice();
+    let base = base.as_slice();
+    // cursor may be a zero-length slice at the end of base,
+    // which base.as_ptr_range().contains(cursor.as_ptr()) will reject.
+    let base_range = base.as_ptr_range();
+    let curs_range = cursor.as_ptr_range();
+    assert!(
+        base_range.start <= curs_range.start && curs_range.end <= base_range.end,
+        "cursor should be a subslice of base"
+    );
+    let offset = unsafe { cursor.as_ptr().offset_from(base.as_ptr()) };
+    assert!(offset >= 0, "offset should be non-negative");
+    offset as usize
+}
+
 #[test]
 fn test_wstr_offset_in() {
    use crate::wchar::L;