diff --git a/posix-regex b/posix-regex index eccf84478c7a60a2869bbe438cb1d2507c9f45b2..bf7d93bb7aba406dc135a7a9eb031371e7739fe6 160000 --- a/posix-regex +++ b/posix-regex @@ -1 +1 @@ -Subproject commit eccf84478c7a60a2869bbe438cb1d2507c9f45b2 +Subproject commit bf7d93bb7aba406dc135a7a9eb031371e7739fe6 diff --git a/src/header/fnmatch/mod.rs b/src/header/fnmatch/mod.rs index 899dbf7b7c797519e1158452a92adad5333ec586..6d3800ad13198b3c58700db6b2c02e5b967de5d6 100644 --- a/src/header/fnmatch/mod.rs +++ b/src/header/fnmatch/mod.rs @@ -1,8 +1,14 @@ //! fnmatch implementation +use alloc::borrow::Cow; use alloc::vec::Vec; +use core::slice; use platform::types::*; +use posix_regex::PosixRegex; +use posix_regex::compile::{Collation, Token, Range}; + +const ONCE: Range = Range(1, Some(1)); pub const FNM_NOMATCH: c_int = 1; @@ -10,82 +16,105 @@ pub const FNM_NOESCAPE: c_int = 1; pub const FNM_PATHNAME: c_int = 2; pub const FNM_PERIOD: c_int = 4; pub const FNM_CASEFOLD: c_int = 8; +// TODO: FNM_EXTMATCH -#[derive(Debug)] -enum Token { - Any, - Char(u8), - Match(bool, Vec<u8>), - Wildcard, - // TODO: FNM_EXTMATCH, which is basically a whole another custom regex - // format that's ambigious and ugh. The C standard library is really bloaty - // and I sure hope we can get away with delaying this as long as possible. - // If you need to implement this, you can contact jD91mZM2 for assistance - // in reading this code in case it's ugly. -} - -unsafe fn next_token(pattern: &mut *const c_char, flags: c_int) -> Option<Token> { - let c = **pattern as u8; - if c == 0 { - return None; - } - *pattern = pattern.offset(1); - Some(match c { - b'\\' if flags & FNM_NOESCAPE == FNM_NOESCAPE => { - let c = **pattern as u8; - if c == 0 { - // Trailing backslash. Maybe error here? - return None; - } - *pattern = pattern.offset(1); - Token::Char(c) +unsafe fn tokenize(mut pattern: *const u8, flags: c_int) -> Vec<(Token, Range)> { + fn any(leading: bool, flags: c_int) -> Token { + let mut list = Vec::new(); + if flags & FNM_PATHNAME == FNM_PATHNAME { + list.push(Collation::Char(b'/')) + } + if leading && flags & FNM_PERIOD == FNM_PERIOD { + list.push(Collation::Char(b'.')) } - b'?' => Token::Any, - b'*' => Token::Wildcard, - b'[' => { - let mut matches = Vec::new(); - let invert = if **pattern as u8 == b'!' { - *pattern = pattern.offset(1); - true - } else { - false - }; + Token::OneOf { invert: true, list } + } + fn can_push(leading: bool, flags: c_int, c: u8) -> bool { + (c != b'/' || flags & FNM_PATHNAME != FNM_PATHNAME) + && (c != b'.' || !leading || flags & FNM_PERIOD != FNM_PERIOD) + } + fn is_leading(flags: c_int, c: u8) -> bool { + c == b'/' && flags & FNM_PATHNAME == FNM_PATHNAME + } + + let mut tokens = Vec::new(); + let mut leading = true; + + while *pattern != 0 { + let was_leading = leading; + leading = false; - loop { - let mut c = **pattern as u8; + let c = *pattern; + pattern = pattern.offset(1); + + tokens.push(match c { + b'\\' if flags & FNM_NOESCAPE == FNM_NOESCAPE => { + let c = *pattern; if c == 0 { + // Trailing backslash. Maybe error here? break; } - *pattern = pattern.offset(1); - match c { - b']' => break, - b'\\' => { - c = **pattern as u8; - *pattern = pattern.offset(1); - if c == 0 { - // Trailing backslash. Maybe error? - break; + pattern = pattern.offset(1); + leading = is_leading(flags, c); + (Token::Char(c), ONCE) + }, + b'?' => (any(was_leading, flags), ONCE), + b'*' => (any(was_leading, flags), Range(0, None)), + b'[' => { + let mut list: Vec<Collation> = Vec::new(); + let invert = if *pattern == b'!' { + pattern = pattern.offset(1); + true + } else { + false + }; + + loop { + let mut c = *pattern; + if c == 0 { + break; + } + pattern = pattern.offset(1); + match c { + b']' => break, + b'\\' => { + c = *pattern; + pattern = pattern.offset(1); + if c == 0 { + // Trailing backslash. Maybe error? + break; + } } + _ => (), } - _ => (), - } - if matches.len() >= 2 && matches[matches.len() - 1] == b'-' { - let len = matches.len(); - let start = matches[len - 2]; - matches.drain(len - 2..); - // Exclusive range because we'll push C later - for c in start..c { - matches.push(c); + if *pattern == b'-' && *pattern.offset(1) != 0 { + let end = *pattern.offset(1); + pattern = pattern.offset(2); + for c in c..=end { + if can_push(was_leading, flags, c) { + list.push(Collation::Char(c)); + } + } + } else { + if can_push(was_leading, flags, c) { + list.push(Collation::Char(c)); + } } } - matches.push(c); - } - // Otherwise, there was no closing ]. Maybe error? + // Otherwise, there was no closing ]. Maybe error? - Token::Match(invert, matches) - } - c => Token::Char(c), - }) + (Token::OneOf { + invert, + list + }, ONCE) + } + c => { + leading = is_leading(flags, c); + (Token::Char(c), ONCE) + } + }) + } + tokens } #[no_mangle] @@ -94,75 +123,20 @@ pub unsafe extern "C" fn fnmatch( mut input: *const c_char, flags: c_int, ) -> c_int { - let pathname = flags & FNM_PATHNAME == FNM_PATHNAME; - let casefold = flags & FNM_CASEFOLD == FNM_CASEFOLD; - - let mut leading = true; - - loop { - if *input == 0 { - return if *pattern == 0 { 0 } else { FNM_NOMATCH }; - } - if leading && flags & FNM_PERIOD == FNM_PERIOD { - if *input as u8 == b'.' && *pattern as u8 != b'.' { - return FNM_NOMATCH; - } - } - leading = false; - match next_token(&mut pattern, flags) { - Some(Token::Any) => { - if pathname && *input as u8 == b'/' { - return FNM_NOMATCH; - } - input = input.offset(1); - } - Some(Token::Char(c)) => { - let mut a = *input as u8; - if casefold && a >= b'a' && a <= b'z' { - a -= b'a' - b'A'; - } - let mut b = c; - if casefold && b >= b'a' && b <= b'z' { - b -= b'a' - b'A'; - } - if a != b { - return FNM_NOMATCH; - } - if pathname && a == b'/' { - leading = true; - } - input = input.offset(1); - } - Some(Token::Match(invert, matches)) => { - if (pathname && *input as u8 == b'/') || matches.contains(&(*input as u8)) == invert - { - // Found it, but it's inverted! Or vise versa. - return FNM_NOMATCH; - } - input = input.offset(1); - } - Some(Token::Wildcard) => { - loop { - let c = *input as u8; - if c == 0 { - return if *pattern == 0 { 0 } else { FNM_NOMATCH }; - } + let mut len = 0; + while *input.offset(len) != 0 { + len += 1; + } + let input = slice::from_raw_parts(input as *const u8, len as usize); - let ret = fnmatch(pattern, input, flags); - if ret == FNM_NOMATCH { - input = input.offset(1); - } else { - // Either an error or a match. Forward the return. - return ret; - } + let mut tokens = tokenize(pattern as *const u8, flags); + tokens.push((Token::End, ONCE)); - if pathname && c == b'/' { - // End of segment, no match yet - return FNM_NOMATCH; - } - } - } - None => return FNM_NOMATCH, // Pattern ended but there's still some input - } + if PosixRegex::new(Cow::Owned(vec![tokens])) + .case_insensitive(flags & FNM_CASEFOLD == FNM_CASEFOLD) + .matches_exact(input).is_some() { + 0 + } else { + FNM_NOMATCH } }