diff --git a/.gitignore b/.gitignore index ef090e899bb45834f9b1d591d4491f5b84bf1ac4..5de4fc4f57d956f2e739c22113c4db9f507fec84 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ +.* +cachegrind.* +perf.* target manual/book diff --git a/src/parser/arguments.rs b/src/parser/arguments.rs index 02c92beb376c8dafded8ca08a2440bc3688f9509..20a84ca104e501e85a928ec9a7f3611f3874a94a 100644 --- a/src/parser/arguments.rs +++ b/src/parser/arguments.rs @@ -1,11 +1,9 @@ const DOUBLE: u8 = 1; -const SINGLE: u8 = 2; -const BACK: u8 = 4; -const COMM_1: u8 = 8; -const COMM_2: u8 = 16; -const VARIAB: u8 = 32; -const ARRAY: u8 = 64; -const METHOD: u8 = 128; +const COMM_1: u8 = 2; +const COMM_2: u8 = 4; +const VARIAB: u8 = 8; +const ARRAY: u8 = 16; +const METHOD: u8 = 32; /// An efficient `Iterator` structure for splitting arguments pub(crate) struct ArgumentSplitter<'a> { @@ -24,54 +22,79 @@ impl<'a> ArgumentSplitter<'a> { } } +impl<'a> ArgumentSplitter<'a> { + fn scan_singlequotes<B: Iterator<Item = u8>>(&mut self, bytes: &mut B) { + while let Some(character) = bytes.next() { + match character { + b'\\' => { + self.read += 2; + let _ = bytes.next(); + continue; + } + b'\'' => break, + _ => (), + } + self.read += 1; + } + } +} + impl<'a> Iterator for ArgumentSplitter<'a> { type Item = &'a str; fn next(&mut self) -> Option<&'a str> { - while let Some(&b' ') = self.data.as_bytes().get(self.read) { + let data = self.data.as_bytes(); + while let Some(&b' ') = data.get(self.read) { self.read += 1; } let start = self.read; - let (mut level, mut array_level, mut array_process_level) = (0, 0, 0); - for character in self.data.bytes().skip(self.read) { + let (mut level, mut alevel) = (0, 0); + let mut bytes = data.iter().cloned().skip(self.read); + while let Some(character) = bytes.next() { match character { - _ if self.flags & BACK != 0 => self.flags ^= BACK, - b'\\' => self.flags ^= BACK, - b'@' if self.flags & SINGLE == 0 => { - self.flags &= 255 ^ COMM_1; - self.flags |= COMM_2 + ARRAY; - self.read += 1; + // Skip the next byte. + b'\\' => { + self.read += 2; + let _ = bytes.next(); continue; } - b'$' if self.flags & SINGLE == 0 => { - self.flags &= 255 ^ COMM_2; - self.flags |= COMM_1 + VARIAB; + // Disable COMM_1 and enable COMM_2 + ARRAY. + b'@' => { + self.flags = (self.flags & (255 ^ COMM_1)) | (COMM_2 + ARRAY); self.read += 1; continue; } - b'[' if self.flags & SINGLE == 0 && self.flags & COMM_2 != 0 => { - array_process_level += 1 - } - b'[' if self.flags & SINGLE == 0 => array_level += 1, - b']' if self.flags & SINGLE == 0 && array_level != 0 => array_level -= 1, - b']' if self.flags & SINGLE == 0 => array_process_level -= 1, - b'(' if self.flags & SINGLE == 0 && self.flags & COMM_1 != 0 => level += 1, - b'(' if self.flags & SINGLE == 0 && self.flags & (VARIAB + ARRAY) != 0 => { - self.flags |= METHOD; - self.flags &= 255 ^ (VARIAB + ARRAY); + // Disable COMM_2 and enable COMM_1 + VARIAB. + b'$' => { + self.flags = (self.flags & (255 ^ COMM_2)) | (COMM_1 + VARIAB); + self.read += 1; + continue; } - b')' if self.flags & SINGLE == 0 && self.flags & METHOD != 0 => { - self.flags &= 255 ^ METHOD; + // Increment the array level + b'[' => alevel += 1, + // Decrement the array level + b']' => alevel -= 1, + // Increment the parenthesis level. + b'(' if self.flags & COMM_1 != 0 => level += 1, + // Disable VARIAB + ARRAY and enable METHOD. + b'(' if self.flags & (VARIAB + ARRAY) != 0 => { + self.flags = (self.flags & (255 ^ (VARIAB + ARRAY))) | METHOD; } - b')' if self.flags & SINGLE == 0 => level -= 1, - b'"' if self.flags & SINGLE == 0 => self.flags ^= DOUBLE, - b'\'' if self.flags & DOUBLE == 0 => self.flags ^= SINGLE, - b' ' if self.flags & (SINGLE + DOUBLE + METHOD) == 0 && level == 0 - && array_level == 0 && array_process_level == 0 => - { - break + // Disable METHOD if enabled. + b')' if self.flags & METHOD != 0 => self.flags ^= METHOD, + // Otherwise decrement the parenthesis level. + b')' => level -= 1, + // Toggle double quote rules. + b'"' => self.flags ^= DOUBLE, + // Loop through characters until single quote rules are completed. + b'\'' if self.flags & DOUBLE == 0 => { + self.scan_singlequotes(&mut bytes); + self.read += 2; + continue; } + // Break from the loop once a root-level space is found. + b' ' if (self.flags & (DOUBLE + METHOD)) + level + alevel == 0 => break, _ => (), } self.read += 1; diff --git a/src/parser/assignments/splitter.rs b/src/parser/assignments/splitter.rs index f68a1c0823ec123f253d1e62c465493f4c6c81e6..4bb6e4603f759298d02ac0d9b1c1eb65e8632a3d 100644 --- a/src/parser/assignments/splitter.rs +++ b/src/parser/assignments/splitter.rs @@ -19,7 +19,7 @@ pub(crate) fn split_assignment<'a>( start = read; read += 1; break; - } else if [b'+', b'-', b'/', b'*'].contains(&byte) { + } else if is_operator(byte) { start = read; read += 1; while let Some(byte) = bytes.next() { @@ -48,6 +48,8 @@ pub(crate) fn split_assignment<'a>( (Some(keys), Some(operator), Some(values.trim())) } +fn is_operator(byte: u8) -> bool { byte == b'+' || byte == b'-' || byte == b'*' || byte == b'/' } + #[cfg(test)] mod tests { use super::*;