From f7e67728d22a3721820a9587e790f8f52ef22dfb Mon Sep 17 00:00:00 2001 From: Michael Aaron Murphy <mmstickman@gmail.com> Date: Sun, 19 Mar 2017 12:54:51 -0400 Subject: [PATCH] Validate Syntax in StatementSplitter Each statement will now be validated for correct syntax. When an error is found, a descriptive error will be printed to standard error and that statement will be skipped. The two types of errors that are curretly detected are unterminated subshells and unescaped parenthesis in the wrong place. More types will be added to this logic in the near future, such as unterminated braces or invalid characters in brace logic. This is the first step towards an upcoming brace expansion rewrite that I am planning to perform. --- Cargo.toml | 1 - src/parser/mod.rs | 2 +- src/parser/statements.rs | 85 +++++++++++++++++++++++++++++----------- src/shell/flow.rs | 22 ++++++++++- src/shell/mod.rs | 27 +++++++++++-- 5 files changed, 106 insertions(+), 31 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 09eb39a5..77c1498a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,6 @@ authors = [ [[bin]] name = "ion" - [dependencies] glob = "0.2" liner = "0.1" diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b4e570c5..0c8c8949 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9,7 +9,7 @@ pub mod shell_expand; mod statements; pub use self::loops::for_grammar::ForExpression; -pub use self::statements::StatementSplitter; +pub use self::statements::{StatementSplitter, StatementError}; /// Takes an argument string as input and expands it. pub fn expand_string<'a>(original: &'a str, vars: &Variables, dir_stack: &DirectoryStack) -> Result<String, ExpandErr> { diff --git a/src/parser/statements.rs b/src/parser/statements.rs index 8d43fa43..856b93c2 100644 --- a/src/parser/statements.rs +++ b/src/parser/statements.rs @@ -3,23 +3,31 @@ const DQUOTE: u8 = 2; const BACKSL: u8 = 4; const COMM_1: u8 = 8; +#[derive(Debug, PartialEq)] +pub enum StatementError { + InvalidCharacter(char, usize), + UnterminatedSubshell +} + pub struct StatementSplitter<'a> { data: &'a str, read: usize, flags: u8, + process_level: u8, + // brace_level: u8, } impl<'a> StatementSplitter<'a> { pub fn new(data: &'a str) -> StatementSplitter<'a> { - StatementSplitter { data: data, read: 0, flags: 0 } + StatementSplitter { data: data, read: 0, flags: 0, process_level: 0 } } } impl<'a> Iterator for StatementSplitter<'a> { - type Item = &'a str; - fn next(&mut self) -> Option<&'a str> { + type Item = Result<&'a str, StatementError>; + fn next(&mut self) -> Option<Result<&'a str, StatementError>> { let start = self.read; - let mut levels = 0u8; + let mut error = None; for character in self.data.bytes().skip(self.read) { self.read += 1; match character { @@ -28,15 +36,31 @@ impl<'a> Iterator for StatementSplitter<'a> { b'"' if self.flags & SQUOTE == 0 => self.flags ^= DQUOTE, b'\\' if self.flags & (SQUOTE + DQUOTE) == 0 => self.flags |= BACKSL, b'$' if self.flags & (SQUOTE + DQUOTE) == 0 => { self.flags |= COMM_1; continue }, - b'(' if self.flags & COMM_1 != 0 => levels += 1, - b')' if levels > 0 => levels -= 1, - b';' if (self.flags & (SQUOTE + DQUOTE) == 0) && levels == 0 => { - return Some(self.data[start..self.read-1].trim()) + b'(' if self.flags & COMM_1 == 0 => { + if error.is_none() { + error = Some(StatementError::InvalidCharacter(character as char, self.read)) + } + }, + b'(' if self.flags & COMM_1 != 0 => self.process_level += 1, + b')' if self.process_level == 0 => { + if error.is_none() { + error = Some(StatementError::InvalidCharacter(character as char, self.read)) + } + }, + b')' => self.process_level -= 1, + b';' if (self.flags & (SQUOTE + DQUOTE) == 0) && self.process_level == 0 => { + return match error { + Some(error) => Some(Err(error)), + None => Some(Ok(self.data[start..self.read-1].trim())) + }; }, - b'#' if (self.flags & (SQUOTE + DQUOTE) == 0) && levels == 0 => { + b'#' if (self.flags & (SQUOTE + DQUOTE) == 0) && self.process_level == 0 => { let output = self.data[start..self.read-1].trim(); self.read = self.data.len(); - return Some(output); + return match error { + Some(error) => Some(Err(error)), + None => Some(Ok(output)) + }; }, _ => () } @@ -47,16 +71,31 @@ impl<'a> Iterator for StatementSplitter<'a> { None } else { self.read = self.data.len(); - Some(self.data[start..].trim()) + match error { + Some(error) => Some(Err(error)), + None if self.process_level != 0 => Some(Err(StatementError::UnterminatedSubshell)), + None => Some(Ok(self.data[start..].trim())) + } } } } +#[test] +fn statements_with_syntax_errors() { + let command = "echo (echo one); echo $((echo one); echo ) two; echo $(echo one"; + let results = StatementSplitter::new(command).collect::<Vec<Result<&str, StatementError>>>(); + assert_eq!(results.len(), 4); + assert_eq!(results[0], Err(StatementError::InvalidCharacter('(', 6))); + assert_eq!(results[1], Err(StatementError::InvalidCharacter('(', 25))); + assert_eq!(results[2], Err(StatementError::InvalidCharacter(')', 42))); + assert_eq!(results[3], Err(StatementError::UnterminatedSubshell)); +} + #[test] fn statements_with_processes() { let command = "echo $(seq 1 10); echo $(seq 1 10)"; for statement in StatementSplitter::new(command) { - assert_eq!(statement, "echo $(seq 1 10)"); + assert_eq!(statement, Ok("echo $(seq 1 10)")); } } @@ -64,37 +103,37 @@ fn statements_with_processes() { fn statements_process_with_statements() { let command = "echo $(seq 1 10; seq 1 10)"; for statement in StatementSplitter::new(command) { - assert_eq!(statement, command); + assert_eq!(statement, Ok(command)); } } #[test] fn statements_with_quotes() { let command = "echo \"This ;'is a test\"; echo 'This ;\" is also a test'"; - let results = StatementSplitter::new(command).collect::<Vec<&str>>(); + let results = StatementSplitter::new(command).collect::<Vec<Result<&str, StatementError>>>(); assert_eq!(results.len(), 2); - assert_eq!(results[0], "echo \"This ;'is a test\""); - assert_eq!(results[1], "echo 'This ;\" is also a test'"); + assert_eq!(results[0], Ok("echo \"This ;'is a test\"")); + assert_eq!(results[1], Ok("echo 'This ;\" is also a test'")); } #[test] fn statements_with_comments() { let command = "echo $(echo one # two); echo three # four"; - let results = StatementSplitter::new(command).collect::<Vec<&str>>(); + let results = StatementSplitter::new(command).collect::<Vec<Result<&str, StatementError>>>(); assert_eq!(results.len(), 2); - assert_eq!(results[0], "echo $(echo one # two)"); - assert_eq!(results[1], "echo three"); + assert_eq!(results[0], Ok("echo $(echo one # two)")); + assert_eq!(results[1], Ok("echo three")); } #[test] fn statements_with_process_recursion() { let command = "echo $(echo one $(echo two) three)"; - let results = StatementSplitter::new(command).collect::<Vec<&str>>(); + let results = StatementSplitter::new(command).collect::<Vec<Result<&str, StatementError>>>(); assert_eq!(results.len(), 1); - assert_eq!(results[0], command); + assert_eq!(results[0], Ok(command)); let command = "echo $(echo $(echo one; echo two); echo two)"; - let results = StatementSplitter::new(command).collect::<Vec<&str>>(); + let results = StatementSplitter::new(command).collect::<Vec<Result<&str, StatementError>>>(); assert_eq!(results.len(), 1); - assert_eq!(results[0], command); + assert_eq!(results[0], Ok(command)); } diff --git a/src/shell/flow.rs b/src/shell/flow.rs index 80a1abf5..eabd2adb 100644 --- a/src/shell/flow.rs +++ b/src/shell/flow.rs @@ -4,7 +4,7 @@ use status::*; use super::Shell; use flow_control::{ElseIf, Function, Statement, collect_loops, collect_if}; -use parser::{ForExpression, StatementSplitter}; +use parser::{ForExpression, StatementSplitter, StatementError}; use parser::peg::{parse, Pipeline}; pub trait FlowLogic { @@ -20,7 +20,25 @@ pub trait FlowLogic { impl<'a> FlowLogic for Shell<'a> { fn on_command(&mut self, command_string: &str) { - let mut iterator = StatementSplitter::new(command_string).map(parse); + let mut iterator = StatementSplitter::new(command_string).filter_map(|statement| { + match statement { + Ok(statement) => Some(statement), + Err(err) => { + let stderr = io::stderr(); + match err { + StatementError::InvalidCharacter(character, position) => { + let _ = writeln!(stderr.lock(), + "ion: syntax error: '{}' at position {} is out of place", + character, position); + }, + StatementError::UnterminatedSubshell => { + let _ = writeln!(stderr.lock(), "ion: syntax error: unterminated subshell"); + } + } + None + } + } + }).map(parse); // If the value is set to `0`, this means that we don't need to append to an existing // partial statement block in memory, but can read and execute new statements. diff --git a/src/shell/mod.rs b/src/shell/mod.rs index 9b76840b..f21e603a 100644 --- a/src/shell/mod.rs +++ b/src/shell/mod.rs @@ -25,7 +25,7 @@ use variables::Variables; use status::*; use pipe::execute_pipeline; use parser::shell_expand::ExpandErr; -use parser::{expand_string, StatementSplitter}; +use parser::{expand_string, StatementError, StatementSplitter}; use parser::peg::{parse, Pipeline}; /// This struct will contain all of the data structures related to this @@ -154,7 +154,8 @@ impl<'a> Shell<'a> { } else { match File::open(&arg) { Ok(mut file) => { - let mut command_list = String::new(); + let capacity = file.metadata().ok().map_or(0, |x| x.len()); + let mut command_list = String::with_capacity(capacity as usize); match file.read_to_string(&mut command_list) { Ok(_) => { for command in command_list.lines() { @@ -294,7 +295,25 @@ impl<'a> Shell<'a> { alias += argument; } - for statement in StatementSplitter::new(&alias).map(parse) { + for statement in StatementSplitter::new(&alias).filter_map(|statement| { + match statement { + Ok(statement) => Some(statement), + Err(err) => { + let stderr = io::stderr(); + match err { + StatementError::InvalidCharacter(character, position) => { + let _ = writeln!(stderr.lock(), + "ion: syntax error: '{}' at position {} is out of place", + character, position); + }, + StatementError::UnterminatedSubshell => { + let _ = writeln!(stderr.lock(), "ion: syntax error: unterminated subshell"); + } + } + None + } + } + }).map(parse) { match statement { Statement::Pipelines(mut pipelines) => for mut pipeline in pipelines.drain(..) { exit_status = self.run_pipeline(&mut pipeline, true); @@ -316,7 +335,7 @@ impl<'a> Shell<'a> { // Run the 'main' of the command and set exit_status Some((*command.main)(pipeline.jobs[0].args.as_slice(), self)) // Branch else if -> input == shell function and set the exit_status - } else if let Some(function) = self.functions.get(pipeline.jobs[0].command.as_str()).cloned() { + } else if let Some(function) = self.functions.get(pipeline.jobs[0].command.as_str()).cloned() { if pipeline.jobs[0].args.len() - 1 == function.args.len() { let mut variables_backup: HashMap<&str, Option<String>> = HashMap::new(); for (name, value) in function.args.iter().zip(pipeline.jobs[0].args.iter().skip(1)) { -- GitLab