blob: 6a18fe7468ebe7a20bd462f5760ff77f9206ac6f [file] [log] [blame] [edit]
use bstr::{BStr, ByteSlice};
///
#[allow(clippy::empty_docs)]
pub mod component {
/// The error returned by [`component()`](super::component()).
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error("A path component must not be empty")]
Empty,
#[error("Path separators like / or \\ are not allowed")]
PathSeparator,
#[error("Windows path prefixes are not allowed")]
WindowsPathPrefix,
#[error("Windows device-names may have side-effects and are not allowed")]
WindowsReservedName,
#[error("Trailing spaces or dots, and the following characters anywhere, are forbidden in Windows paths, along with non-printable ones: <>:\"|?*")]
WindowsIllegalCharacter,
#[error("The .git name may never be used")]
DotGitDir,
#[error("The .gitmodules file must not be a symlink")]
SymlinkedGitModules,
}
/// Further specify what to check for in [`component()`](super::component())
///
/// Note that the `Default` implementation maximizes safety by enabling all protections.
#[derive(Debug, Copy, Clone)]
pub struct Options {
/// This flag should be turned on when on Windows, but can be turned on when on other platforms
/// as well to prevent path components that can cause trouble on Windows.
pub protect_windows: bool,
/// If `true`, protections for the MacOS HFS+ filesystem will be active, checking for
/// special directories that we should never write while ignoring codepoints just like HFS+ would.
///
/// This field is equivalent to `core.protectHFS`.
pub protect_hfs: bool,
/// If `true`, protections for Windows NTFS specific features will be active. This adds special handling
/// for `8.3` filenames and alternate data streams, both of which could be used to mask the true name of
/// what would be created on disk.
///
/// This field is equivalent to `core.protectNTFS`.
pub protect_ntfs: bool,
}
impl Default for Options {
fn default() -> Self {
Options {
protect_windows: true,
protect_hfs: true,
protect_ntfs: true,
}
}
}
/// The mode of the component, if it's the leaf of a path.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum Mode {
/// The item is a symbolic link.
Symlink,
}
}
/// Assure the given `input` resembles a valid name for a tree or blob, and in that sense, a path component.
/// `mode` indicates the kind of `input` and it should be `Some` if `input` is the last component in the underlying
/// path.
///
/// `input` must not make it possible to exit the repository, or to specify absolute paths.
pub fn component(
input: &BStr,
mode: Option<component::Mode>,
component::Options {
protect_windows,
protect_hfs,
protect_ntfs,
}: component::Options,
) -> Result<&BStr, component::Error> {
if input.is_empty() {
return Err(component::Error::Empty);
}
if protect_windows {
if input.find_byteset(b"/\\").is_some() {
return Err(component::Error::PathSeparator);
}
if input.chars().nth(1) == Some(':') {
return Err(component::Error::WindowsPathPrefix);
}
} else if input.find_byte(b'/').is_some() {
return Err(component::Error::PathSeparator);
}
if protect_hfs {
if is_dot_hfs(input, "git") {
return Err(component::Error::DotGitDir);
}
if is_symlink(mode) && is_dot_hfs(input, "gitmodules") {
return Err(component::Error::SymlinkedGitModules);
}
}
if protect_ntfs {
if is_dot_git_ntfs(input) {
return Err(component::Error::DotGitDir);
}
if is_symlink(mode) && is_dot_ntfs(input, "gitmodules", "gi7eba") {
return Err(component::Error::SymlinkedGitModules);
}
if protect_windows {
if let Some(err) = check_win_devices_and_illegal_characters(input) {
return Err(err);
}
}
}
if !(protect_hfs | protect_ntfs) {
if input.eq_ignore_ascii_case(b".git") {
return Err(component::Error::DotGitDir);
}
if is_symlink(mode) && input.eq_ignore_ascii_case(b".gitmodules") {
return Err(component::Error::SymlinkedGitModules);
}
}
Ok(input)
}
/// Return `true` if the path component at `input` looks like a Windows device, like `CON`
/// or `LPT1` (case-insensitively).
///
/// This is relevant only on Windows, where one may be tricked into reading or writing to such devices.
/// When reading from `CON`, a console-program may block until the user provided input.
pub fn component_is_windows_device(input: &BStr) -> bool {
is_win_device(input)
}
fn is_win_device(input: &BStr) -> bool {
let Some(in3) = input.get(..3) else { return false };
if in3.eq_ignore_ascii_case(b"AUX") && is_done_windows(input.get(3..)) {
return true;
}
if in3.eq_ignore_ascii_case(b"NUL") && is_done_windows(input.get(3..)) {
return true;
}
if in3.eq_ignore_ascii_case(b"PRN") && is_done_windows(input.get(3..)) {
return true;
}
// Note that the following allows `COM0`, even though `LPT0` is not allowed.
// Even though tests seem to indicate that neither `LPT0` nor `COM0` are valid
// device names, it's unclear this truly is the case in all possible versions and editions
// of Windows.
// Hence, justification for this asymmetry is merely to do exactly the same as Git does,
// and to have exactly the same behaviour during validation (for worktree-writes).
if in3.eq_ignore_ascii_case(b"COM")
&& input.get(3).map_or(false, |n| *n >= b'1' && *n <= b'9')
&& is_done_windows(input.get(4..))
{
return true;
}
if in3.eq_ignore_ascii_case(b"LPT")
&& input.get(3).map_or(false, u8::is_ascii_digit)
&& is_done_windows(input.get(4..))
{
return true;
}
if in3.eq_ignore_ascii_case(b"CON")
&& (is_done_windows(input.get(3..))
|| (input.get(3..6).map_or(false, |n| n.eq_ignore_ascii_case(b"IN$")) && is_done_windows(input.get(6..)))
|| (input.get(3..7).map_or(false, |n| n.eq_ignore_ascii_case(b"OUT$")) && is_done_windows(input.get(7..))))
{
return true;
}
false
}
fn check_win_devices_and_illegal_characters(input: &BStr) -> Option<component::Error> {
if is_win_device(input) {
return Some(component::Error::WindowsReservedName);
}
if input.iter().any(|b| *b < 0x20 || b":<>\"|?*".contains(b)) {
return Some(component::Error::WindowsIllegalCharacter);
}
if input.ends_with(b".") || input.ends_with(b" ") {
return Some(component::Error::WindowsIllegalCharacter);
}
None
}
fn is_symlink(mode: Option<component::Mode>) -> bool {
mode.map_or(false, |m| m == component::Mode::Symlink)
}
fn is_dot_hfs(input: &BStr, search_case_insensitive: &str) -> bool {
let mut input = input.chars().filter(|c| match *c as u32 {
// Case-insensitive HFS+ skips these code points as "ignorable" when comparing filenames. See:
// https://github.com/git/git/commit/6162a1d323d24fd8cbbb1a6145a91fb849b2568f
// https://developer.apple.com/library/archive/technotes/tn/tn1150.html#StringComparisonAlgorithm
// https://github.com/apple-oss-distributions/hfs/blob/main/core/UCStringCompareData.h
0x200c | // ZERO WIDTH NON-JOINER
0x200d | // ZERO WIDTH JOINER
0x200e | // LEFT-TO-RIGHT MARK
0x200f | // RIGHT-TO-LEFT MARK
0x202a | // LEFT-TO-RIGHT EMBEDDING
0x202b | // RIGHT-TO-LEFT EMBEDDING
0x202c | // POP DIRECTIONAL FORMATTING
0x202d | // LEFT-TO-RIGHT OVERRIDE
0x202e | // RIGHT-TO-LEFT OVERRIDE
0x206a | // INHIBIT SYMMETRIC SWAPPING
0x206b | // ACTIVATE SYMMETRIC SWAPPING
0x206c | // INHIBIT ARABIC FORM SHAPING
0x206d | // ACTIVATE ARABIC FORM SHAPING
0x206e | // NATIONAL DIGIT SHAPES
0x206f | // NOMINAL DIGIT SHAPES
0xfeff => false, // ZERO WIDTH NO-BREAK SPACE
_ => true
});
if input.next() != Some('.') {
return false;
}
let mut comp = search_case_insensitive.chars();
loop {
match (comp.next(), input.next()) {
(Some(a), Some(b)) => {
if !a.eq_ignore_ascii_case(&b) {
return false;
}
}
(None, None) => return true,
_ => return false,
}
}
}
fn is_dot_git_ntfs(input: &BStr) -> bool {
if input
.get(..4)
.map_or(false, |input| input.eq_ignore_ascii_case(b".git"))
{
return is_done_ntfs(input.get(4..));
}
if input
.get(..5)
.map_or(false, |input| input.eq_ignore_ascii_case(b"git~1"))
{
return is_done_ntfs(input.get(5..));
}
false
}
/// The `search_case_insensitive` name is the actual name to look for (in a case-insensitive way).
/// Opposed to that there is the special `ntfs_shortname_prefix` which is derived from `search_case_insensitive`
/// but looks more like a hash, one that NTFS uses to disambiguate things, for when there is a lot of files
/// with the same prefix.
fn is_dot_ntfs(input: &BStr, search_case_insensitive: &str, ntfs_shortname_prefix: &str) -> bool {
if input.first() == Some(&b'.') {
let end_pos = 1 + search_case_insensitive.len();
if input.get(1..end_pos).map_or(false, |input| {
input.eq_ignore_ascii_case(search_case_insensitive.as_bytes())
}) {
is_done_ntfs(input.get(end_pos..))
} else {
false
}
} else {
let search_case_insensitive: &[u8] = search_case_insensitive.as_bytes();
if search_case_insensitive
.get(..6)
.zip(input.get(..6))
.map_or(false, |(ntfs_prefix, first_6_of_input)| {
first_6_of_input.eq_ignore_ascii_case(ntfs_prefix)
&& input.get(6) == Some(&b'~')
// It's notable that only `~1` to `~4` are possible before the disambiguation algorithm
// switches to using the `ntfs_shortname_prefix`, which is checked hereafter.
&& input.get(7).map_or(false, |num| (b'1'..=b'4').contains(num))
})
{
return is_done_ntfs(input.get(8..));
}
let ntfs_shortname_prefix: &[u8] = ntfs_shortname_prefix.as_bytes();
let mut saw_tilde = false;
let mut pos = 0;
while pos < 8 {
let Some(b) = input.get(pos).copied() else {
return false;
};
if saw_tilde {
if !b.is_ascii_digit() {
return false;
}
} else if b == b'~' {
saw_tilde = true;
pos += 1;
let Some(b) = input.get(pos).copied() else {
return false;
};
if !(b'1'..=b'9').contains(&b) {
return false;
}
} else if pos >= 6
|| b & 0x80 == 0x80
|| ntfs_shortname_prefix
.get(pos)
.map_or(true, |ob| !b.eq_ignore_ascii_case(ob))
{
return false;
}
pos += 1;
}
is_done_ntfs(input.get(pos..))
}
}
/// Check if trailing filename bytes leave a match to special files like `.git` unchanged in NTFS.
fn is_done_ntfs(input: Option<&[u8]>) -> bool {
// Skip spaces and dots. Then return true if we are at the end or a colon.
let Some(input) = input else { return true };
for b in input.bytes() {
if b == b':' {
return true;
}
if b != b' ' && b != b'.' {
return false;
}
}
true
}
/// Check if trailing filename bytes leave a match to Windows reserved device names unchanged.
fn is_done_windows(input: Option<&[u8]>) -> bool {
// Skip spaces. Then return true if we are at the end or a dot or colon.
let Some(input) = input else { return true };
let skip = input.bytes().take_while(|b| *b == b' ').count();
let Some(next) = input.get(skip) else { return true };
*next == b'.' || *next == b':'
}