diff --git a/src/parser/arguments.rs b/src/parser/arguments.rs index 1d788182c68f4abb50a0794941528d622e394599..5ab1d55c4d0f711ce7e8b94dbf0b0252e6de2a83 100644 --- a/src/parser/arguments.rs +++ b/src/parser/arguments.rs @@ -3,6 +3,9 @@ const SINGLE: u8 = 2; const BACK: u8 = 4; const COMM_1: u8 = 8; const COMM_2: u8 = 16; +const VARIAB: u8 = 32; +const ARRAY: u8 = 64; +const METHOD: u8 = 128; /// An efficient `Iterator` structure for splitting arguments pub struct ArgumentSplitter<'a> { @@ -35,13 +38,13 @@ impl<'a> Iterator for ArgumentSplitter<'a> { b'\\' => self.flags ^= BACK, b'@' if self.flags & SINGLE == 0 => { self.flags &= 255 ^ COMM_1; - self.flags |= COMM_2; + self.flags |= COMM_2 + ARRAY; self.buffer.push(character); continue }, b'$' if self.flags & SINGLE == 0 => { self.flags &= 255 ^ COMM_2; - self.flags |= COMM_1; + self.flags |= COMM_1 + VARIAB; self.buffer.push(character); continue }, @@ -50,10 +53,17 @@ impl<'a> Iterator for ArgumentSplitter<'a> { b']' if self.flags & SINGLE == 0 && array_level != 0 => array_level -= 1, b']' if self.flags & SINGLE == 0 => array_process_level -= 1, b'(' if self.flags & SINGLE == 0 && self.flags & COMM_1 != 0 => level += 1, + b'(' if self.flags & SINGLE == 0 && self.flags & (VARIAB + ARRAY) != 0 => { + self.flags |= METHOD; + self.flags &= 255 ^ (VARIAB + ARRAY); + }, + b')' if self.flags & SINGLE == 0 && self.flags & METHOD != 0 => { + self.flags &= 255 ^ METHOD; + }, b')' if self.flags & SINGLE == 0 => level -= 1, b'"' if self.flags & SINGLE == 0 => self.flags ^= DOUBLE, b'\'' if self.flags & DOUBLE == 0 => self.flags ^= SINGLE, - b' ' if !self.buffer.is_empty() && (self.flags & (SINGLE + DOUBLE) == 0) + b' ' if !self.buffer.is_empty() && (self.flags & (SINGLE + DOUBLE + METHOD) == 0) && level == 0 && array_level == 0 && array_process_level == 0 => break, _ => () } @@ -71,3 +81,45 @@ impl<'a> Iterator for ArgumentSplitter<'a> { } } } + +#[cfg(test)] +mod tests { + use super::*; + + fn compare(input: &str, expected: Vec<&str>) { + let arguments = ArgumentSplitter::new(input).collect::<Vec<String>>(); + for (&left, right) in expected.iter().zip(arguments.iter()) { + assert_eq!(left, right.as_str()); + } + assert_eq!(expected.len(), arguments.len()); + } + + #[test] + fn methods() { + let input = "echo $join(array, ', ') @split(var, ', ')"; + let expected = vec!["echo", "$join(array, ', ')", "@split(var, ', ')"]; + compare(input, expected); + } + + #[test] + fn processes() { + let input = "echo $(echo one $(echo two)) @[echo one @[echo two]]"; + let expected = vec!["echo", "$(echo one $(echo two))", "@[echo one @[echo two]]"]; + compare(input, expected); + } + + + #[test] + fn arrays() { + let input = "echo [ one two @[echo three four] five ] [ six seven ]"; + let expected = vec!["echo", "[ one two @[echo three four] five ]", "[ six seven ]"]; + compare(input, expected); + } + + #[test] + fn quotes() { + let input = "echo 'one two \"three four\"' \"five six 'seven eight'\""; + let expected = vec!["echo", "'one two \"three four\"'", "\"five six 'seven eight'\""]; + compare(input, expected); + } +} diff --git a/src/parser/pipelines.rs b/src/parser/pipelines.rs index 9dbb15c69bec9196d246372c6fec1ac414b86219..fefab9e7a008e5f61f2d1434ab2e7a585deb733a 100644 --- a/src/parser/pipelines.rs +++ b/src/parser/pipelines.rs @@ -1,7 +1,7 @@ #![allow(eq_op)] // Required as a macro sets this clippy warning off. // TODO: -// - Rewrite this module +// - Rewrite this module like the shell_expand::words module // - Implement Herestrings // - Implement Heredocs // - Fix the cyclomatic complexity issue @@ -14,11 +14,15 @@ const SINGLE_QUOTE: u8 = 2; const DOUBLE_QUOTE: u8 = 4; const WHITESPACE: u8 = 8; const ARRAY_PROCESS: u8 = 16; +const METHOD: u8 = 32; const PROCESS_ONE: u8 = 64; const PROCESS_TWO: u8 = 128; +const ARRAY: u8 = 1; +const VARIABLE: u8 = 2; + // Only valid if `SINGLE_QUOTE` and `DOUBLE_QUOTE` are not enabled -const PROCESS_VAL: u8 = 255 ^ (BACKSLASH + WHITESPACE + 32); +const PROCESS_VAL: u8 = 255 ^ (BACKSLASH + WHITESPACE + METHOD); // Determines if the character is not quoted and isn't process matched. `flags & IS_VALID` returns 0 if true const IS_VALID: u8 = 255 ^ (BACKSLASH + WHITESPACE); @@ -51,6 +55,7 @@ pub fn collect(possible_error: &mut Option<&str>, args: &str) -> Pipeline { let mut args_iter = args.bytes().peekable(); let (mut index, mut arg_start) = (0, 0); let mut flags = 0u8; // (backslash, single_quote, double_quote, x, x, x, process_one, process_two) + let mut flags_ext = 0u8; let mut arguments: Vec<String> = Vec::new(); @@ -118,10 +123,14 @@ pub fn collect(possible_error: &mut Option<&str>, args: &str) -> Pipeline { b'\\' => flags ^= BACKSLASH, b'@' => { flags |= ARRAY_PROCESS; + flags_ext |= ARRAY; index += 1; continue }, - b'$' if flags & PROCESS_VAL == 0 => flags |= PROCESS_ONE, + b'$' if flags & PROCESS_VAL == 0 => { + flags |= PROCESS_ONE; + flags_ext |= VARIABLE; + }, b'[' if flags & ARRAY_PROCESS != 0 => array_process_levels += 1, b'[' => array_levels += 1, b']' if array_levels != 0 => array_levels -= 1, @@ -131,6 +140,13 @@ pub fn collect(possible_error: &mut Option<&str>, args: &str) -> Pipeline { flags |= PROCESS_TWO; levels += 1; }, + b'(' if flags_ext & (VARIABLE + ARRAY) != 0 => { + flags |= METHOD; + flags_ext &= 255 ^ (VARIABLE + ARRAY); + }, + b')' if levels == 0 && flags & METHOD != 0 && flags & SINGLE_QUOTE == 0 => { + flags &= 255 ^ METHOD; + } b')' if flags & PROCESS_VAL == PROCESS_TWO => { levels -= 0; if levels == 0 { flags &= 255 ^ PROCESS_TWO; } @@ -345,6 +361,16 @@ mod tests { } } + #[test] + fn methods() { + if let Statement::Pipeline(pipeline) = parse("echo @split(var, ', ') $join(array, ',')") { + let jobs = pipeline.jobs; + assert_eq!("echo", jobs[0].args[0]); + assert_eq!("@split(var, ', ')", jobs[0].args[1]); + assert_eq!("$join(array, ',')", jobs[0].args[2]); + } + } + #[test] fn subshells_within_subshells() { if let Statement::Pipeline(pipeline) = parse("echo $(echo one $(echo two) three)") { diff --git a/src/parser/shell_expand/mod.rs b/src/parser/shell_expand/mod.rs index b505067fb293727665c963973337e471ac93f674..c00870811e0214dea7915cb3fe8b7d5aff19c6c9 100644 --- a/src/parser/shell_expand/mod.rs +++ b/src/parser/shell_expand/mod.rs @@ -12,6 +12,8 @@ use self::words::{WordIterator, WordToken}; pub use self::words::{Index, IndexPosition}; +use std::io::{self, Write}; + pub struct ExpanderFunctions<'f> { pub tilde: &'f Fn(&str) -> Option<String>, pub array: &'f Fn(&str, Index) -> Option<Vec<String>>, @@ -161,6 +163,19 @@ pub fn expand_string(original: &str, expand_func: &ExpanderFunctions, reverse_qu } } }, + WordToken::StringMethod(method, variable, pattern) => { + let pattern = &expand_string(pattern, expand_func, false).join(" "); + match method { + "join" => if let Some(array) = (expand_func.array)(variable, Index::All) { + current.push_str(&array.join(pattern)); + }, + _ => { + let stderr = io::stderr(); + let mut stderr = stderr.lock(); + let _ = writeln!(stderr, "ion: invalid string method: {}", method); + } + } + }, WordToken::Brace(nodes) => expand_brace(&mut current, &mut expanders, &mut tokens, nodes, expand_func, reverse_quoting), WordToken::Normal(text) => current.push_str(text), @@ -292,6 +307,19 @@ pub fn expand_string(original: &str, expand_func: &ExpanderFunctions, reverse_qu }, } }, + WordToken::StringMethod(method, variable, pattern) => { + let pattern = &expand_string(pattern, expand_func, false).join(" "); + match method { + "join" => if let Some(array) = (expand_func.array)(variable, Index::All) { + output.push_str(&array.join(pattern)); + }, + _ => { + let stderr = io::stderr(); + let mut stderr = stderr.lock(); + let _ = writeln!(stderr, "ion: invalid string method: {}", method); + } + } + }, WordToken::Brace(_) => unreachable!(), WordToken::Normal(text) | WordToken::Whitespace(text) => { output.push_str(text); diff --git a/src/parser/shell_expand/words.rs b/src/parser/shell_expand/words.rs index 8a7d2657dbe713c86f0a7b4b2fe6f1cb422527bd..6625d7e8ffdc93993ba145f62fabbfc9f080f2eb 100644 --- a/src/parser/shell_expand/words.rs +++ b/src/parser/shell_expand/words.rs @@ -65,7 +65,7 @@ pub enum WordToken<'a> { ArrayVariable(&'a str, bool, Index), ArrayProcess(&'a str, bool, Index), Process(&'a str, bool), - // ArrayToString(&'a str, &'a str, &'a str, bool), + StringMethod(&'a str, &'a str, &'a str), // StringToArray(&'a str, &'a str, &'a str, bool), } @@ -137,14 +137,33 @@ impl<'a> WordIterator<'a> { fn variable<I>(&mut self, iterator: &mut I) -> WordToken<'a> where I: Iterator<Item = u8> { - let start = self.read; + let mut start = self.read; self.read += 1; while let Some(character) = iterator.next() { match character { - // If found, this is not a `Variable` but an `ArrayToString` - // b'(' => { - // unimplemented!() - // }, + b'(' => { + let method = &self.data[start..self.read]; + self.read += 1; + start = self.read; + while let Some(character) = iterator.next() { + if character == b',' { + let variable = &self.data[start..self.read]; + self.read += 1; + start = self.read; + while let Some(character) = iterator.next() { + if character == b')' { + let pattern = &self.data[start..self.read].trim(); + self.read += 1; + return WordToken::StringMethod(method, variable, pattern) + } + self.read += 1; + } + } + self.read += 1; + } + + panic!("ion: fatal error with syntax validation parsing: unterminated method"); + } // Only alphanumerical and underscores are allowed in variable names 0...47 | 58...64 | 91...94 | 96 | 123...127 => { return WordToken::Variable(&self.data[start..self.read], self.flags & DQUOTE != 0); @@ -186,6 +205,9 @@ impl<'a> WordIterator<'a> { while let Some(character) = iterator.next() { match character { // TODO: ArrayFunction + // b'(' => { + // let variable = + // } b'[' => { return WordToken::ArrayVariable ( &self.data[start..self.read], @@ -513,6 +535,17 @@ mod tests { assert_eq!(expected.len(), correct); } + #[test] + fn string_method() { + let input = "$join(array, 'pattern') $join(array, 'pattern')"; + let expected = vec![ + WordToken::StringMethod("join", "array", "'pattern'"), + WordToken::Whitespace(" "), + WordToken::StringMethod("join", "array", "'pattern'") + ]; + compare(input, expected); + } + #[test] fn escape_with_backslash() { let input = "\\$FOO\\$BAR \\$FOO"; diff --git a/src/parser/statements.rs b/src/parser/statements.rs index 599e3e967b4ccc375f095216e79a2046188c7327..3079031ac89d210d6ae7d90bad96a61dfa21f1e0 100644 --- a/src/parser/statements.rs +++ b/src/parser/statements.rs @@ -1,13 +1,20 @@ +// TODO: Rewrite this in the same style as shell_expand::words. + +use std::u16; use std::io::{self, Write}; use flow_control::Statement; use super::peg::parse; -const SQUOTE: u8 = 1; -const DQUOTE: u8 = 2; -const BACKSL: u8 = 4; -const COMM_1: u8 = 8; -const COMM_2: u8 = 16; -const VBRACE: u8 = 32; +const SQUOTE: u16 = 1; +const DQUOTE: u16 = 2; +const BACKSL: u16 = 4; +const COMM_1: u16 = 8; +const COMM_2: u16 = 16; +const VBRACE: u16 = 32; +const ARRAY: u16 = 64; +const VARIAB: u16 = 128; +const METHOD: u16 = 256; + #[derive(Debug, PartialEq)] pub enum StatementError { @@ -15,6 +22,7 @@ pub enum StatementError { UnterminatedSubshell, UnterminatedBracedVar, UnterminatedBrace, + UnterminatedMethod, } pub fn check_statement(statement: Result<&str, StatementError>) -> Option<Statement> { @@ -36,6 +44,9 @@ pub fn check_statement(statement: Result<&str, StatementError>) -> Option<Statem }, StatementError::UnterminatedBracedVar => { let _ = writeln!(stderr.lock(), "ion: syntax error: unterminated braced var"); + }, + StatementError::UnterminatedMethod => { + let _ = writeln!(stderr.lock(), "ion: syntax error: unterminated method"); } } None @@ -46,7 +57,7 @@ pub fn check_statement(statement: Result<&str, StatementError>) -> Option<Statem pub struct StatementSplitter<'a> { data: &'a str, read: usize, - flags: u8, + flags: u16, array_level: u8, array_process_level: u8, process_level: u8, @@ -85,13 +96,13 @@ impl<'a> Iterator for StatementSplitter<'a> { b'\'' if self.flags & DQUOTE == 0 => self.flags ^= SQUOTE, b'"' if self.flags & SQUOTE == 0 => self.flags ^= DQUOTE, b'@' if self.flags & SQUOTE == 0 => { - self.flags &= 255 ^ COMM_1; - self.flags |= COMM_2; + self.flags &= u16::MAX ^ COMM_1; + self.flags |= COMM_2 + ARRAY; continue } b'$' if self.flags & SQUOTE == 0 => { - self.flags &= 255 ^ COMM_2; - self.flags |= COMM_1; + self.flags &= u16::MAX ^ COMM_2; + self.flags |= COMM_1 + VARIAB; continue }, b'{' if self.flags & COMM_1 != 0 => self.flags |= VBRACE, @@ -106,12 +117,20 @@ impl<'a> Iterator for StatementSplitter<'a> { } }, b'}' if self.flags & VBRACE != 0 => self.flags ^= VBRACE, - b'(' if self.flags & COMM_1 == 0 => { + b'(' if self.flags & (COMM_1 + VARIAB + ARRAY) == 0 => { if error.is_none() { error = Some(StatementError::InvalidCharacter(character as char, self.read)) } }, - b'[' if self.flags & COMM_2 != 0 && self.flags & SQUOTE == 0 => { + b'(' if self.flags & COMM_1 != 0 => { + self.process_level += 1; + self.flags &= u16::MAX ^ (VARIAB + ARRAY); + }, + b'(' if self.flags & (VARIAB + ARRAY) != 0 => { + self.flags &= u16::MAX ^ (VARIAB + ARRAY); + self.flags |= METHOD; + }, + b'[' if self.flags & COMM_2 != 0 => { self.array_process_level += 1; }, b'[' if self.flags & SQUOTE == 0 => self.array_level += 1, @@ -122,22 +141,22 @@ impl<'a> Iterator for StatementSplitter<'a> { }, b']' if self.flags & SQUOTE == 0 && self.array_level != 0 => self.array_level -= 1, b']' if self.flags & SQUOTE == 0 => self.array_process_level -= 1, - b'(' if self.flags & COMM_1 != 0 && self.flags & SQUOTE == 0 => { - self.process_level += 1; + b')' if self.flags & SQUOTE == 0 && self.flags & METHOD != 0 => { + self.flags ^= METHOD; }, - b')' if self.process_level == 0 && self.flags & SQUOTE == 0 => { + b')' if self.process_level == 0 && self.array_level == 0 && self.flags & SQUOTE == 0 => { if error.is_none() { error = Some(StatementError::InvalidCharacter(character as char, self.read)) } }, b')' if self.flags & SQUOTE == 0 => self.process_level -= 1, - b';' if (self.flags & (SQUOTE + DQUOTE) == 0) && self.process_level == 0 => { + b';' if (self.flags & (SQUOTE + DQUOTE) == 0) && self.process_level == 0 && self.array_process_level == 0 => { return match error { Some(error) => Some(Err(error)), None => Some(Ok(self.data[start..self.read-1].trim())) }; }, - b'#' if self.flags & (SQUOTE + DQUOTE) == 0 && self.process_level == 0 => { + b'#' if self.flags & (SQUOTE + DQUOTE) == 0 && self.process_level == 0 && self.array_process_level == 0 => { let output = self.data[start..self.read-1].trim(); self.read = self.data.len(); return match error { @@ -147,7 +166,7 @@ impl<'a> Iterator for StatementSplitter<'a> { }, _ => () } - self.flags &= 255 ^ (COMM_1 + COMM_2); + self.flags &= u16::MAX ^ (COMM_1 + COMM_2); } if start == self.read { @@ -161,6 +180,7 @@ impl<'a> Iterator for StatementSplitter<'a> { { Some(Err(StatementError::UnterminatedSubshell)) }, + None if self.flags & METHOD != 0 => Some(Err(StatementError::UnterminatedMethod)), None if self.flags & VBRACE != 0 => Some(Err(StatementError::UnterminatedBracedVar)), None if self.brace_level != 0 => Some(Err(StatementError::UnterminatedBrace)), None => Some(Ok(self.data[start..].trim())) @@ -170,18 +190,27 @@ impl<'a> Iterator for StatementSplitter<'a> { } #[test] -fn statements_with_syntax_errors() { +fn syntax_errors() { let command = "echo (echo one); echo $((echo one); echo ) two; echo $(echo one"; let results = StatementSplitter::new(command).collect::<Vec<Result<&str, StatementError>>>(); - assert_eq!(results.len(), 4); assert_eq!(results[0], Err(StatementError::InvalidCharacter('(', 6))); assert_eq!(results[1], Err(StatementError::InvalidCharacter('(', 25))); assert_eq!(results[2], Err(StatementError::InvalidCharacter(')', 42))); assert_eq!(results[3], Err(StatementError::UnterminatedSubshell)); + assert_eq!(results.len(), 4); } #[test] -fn statements_with_processes() { +fn methods() { + let command = "echo $join(array, ', '); echo @join(var, ', ')"; + let statements = StatementSplitter::new(command).collect::<Vec<_>>(); + assert_eq!(statements[0], Ok("echo $join(array, ', ')")); + assert_eq!(statements[1], Ok("echo @join(var, ', ')")); + assert_eq!(statements.len(), 2); +} + +#[test] +fn processes() { let command = "echo $(seq 1 10); echo $(seq 1 10)"; for statement in StatementSplitter::new(command) { assert_eq!(statement, Ok("echo $(seq 1 10)")); @@ -189,7 +218,15 @@ fn statements_with_processes() { } #[test] -fn statements_process_with_statements() { +fn array_processes() { + let command = "echo @[echo one; sleep 1]; echo @[echo one; sleep 1]"; + for statement in StatementSplitter::new(command) { + assert_eq!(statement, Ok("echo @[echo one; sleep 1]")); + } +} + +#[test] +fn process_with_statements() { let command = "echo $(seq 1 10; seq 1 10)"; for statement in StatementSplitter::new(command) { assert_eq!(statement, Ok(command)); @@ -197,7 +234,7 @@ fn statements_process_with_statements() { } #[test] -fn statements_with_quotes() { +fn quotes() { let command = "echo \"This ;'is a test\"; echo 'This ;\" is also a test'"; let results = StatementSplitter::new(command).collect::<Vec<Result<&str, StatementError>>>(); assert_eq!(results.len(), 2); @@ -206,7 +243,7 @@ fn statements_with_quotes() { } #[test] -fn statements_with_comments() { +fn comments() { let command = "echo $(echo one # two); echo three # four"; let results = StatementSplitter::new(command).collect::<Vec<Result<&str, StatementError>>>(); assert_eq!(results.len(), 2); @@ -215,7 +252,7 @@ fn statements_with_comments() { } #[test] -fn statements_with_process_recursion() { +fn nested_process() { let command = "echo $(echo one $(echo two) three)"; let results = StatementSplitter::new(command).collect::<Vec<Result<&str, StatementError>>>(); assert_eq!(results.len(), 1); @@ -226,3 +263,16 @@ fn statements_with_process_recursion() { assert_eq!(results.len(), 1); assert_eq!(results[0], Ok(command)); } + +#[test] +fn nested_array_process() { + let command = "echo @[echo one @[echo two] three]"; + let results = StatementSplitter::new(command).collect::<Vec<Result<&str, StatementError>>>(); + assert_eq!(results.len(), 1); + assert_eq!(results[0], Ok(command)); + + let command = "echo @[echo @[echo one; echo two]; echo two]"; + let results = StatementSplitter::new(command).collect::<Vec<Result<&str, StatementError>>>(); + assert_eq!(results.len(), 1); + assert_eq!(results[0], Ok(command)); +}