diff --git a/examples/herestring.ion b/examples/herestring.ion index 55b988e2d7d96cf598082aa64d8993d24cf8d173..fe327822de4ddc499334daffe9d712a79cb1ff09 100644 --- a/examples/herestring.ion +++ b/examples/herestring.ion @@ -12,3 +12,9 @@ end find "bar" "@output" find "zar" "@output" + +tr '[a-z]' '[A-Z]' << EOF +one two +three four +five six +EOF diff --git a/examples/herestring.out b/examples/herestring.out index 46bae8fcf850d4c61e955279754ef3105e932720..e43349bccffab183736112be6e32141459664f8a 100644 --- a/examples/herestring.out +++ b/examples/herestring.out @@ -1,3 +1,6 @@ FOO true false +ONE TWO +THREE FOUR +FIVE SIX diff --git a/src/parser/peg.rs b/src/parser/peg.rs index ce1db23927a4a9c66183cfea63a7dd313c1e73b5..405e0736d5b944198809eb9dea904367d62560ee 100644 --- a/src/parser/peg.rs +++ b/src/parser/peg.rs @@ -23,7 +23,8 @@ pub struct Redirection { pub enum Input { /// A file; the contents of said file will be written to the `stdin` of a process File(String), - /// A string literal that is written to the `stdin` of a process + /// A string literal that is written to the `stdin` of a process. + /// If there is a second string, that second string is the EOF phrase for the heredoc. HereString(String), } diff --git a/src/parser/pipelines.rs b/src/parser/pipelines.rs index fc2fe2a18f89c05ec2c52a4e8cd0f2de80166ed2..bc59a02372b2e6a5e5a8721618c3eeccc721c3c5 100644 --- a/src/parser/pipelines.rs +++ b/src/parser/pipelines.rs @@ -1,11 +1,5 @@ #![allow(eq_op)] // Required as a macro sets this clippy warning off. -// TODO: -// - Rewrite this module like the shell_expand::words module -// - Implement Herestrings -// - Implement Heredocs -// - Fix the cyclomatic complexity issue - use std::collections::HashSet; use std::iter::Peekable; @@ -270,15 +264,33 @@ impl<'a> Collector<'a> { }, b'<' => { bytes.next(); - if Some(b'<') == self.peek(i + 1) && Some(b'<') == self.peek(i + 2) { - // If the next two characters are arrows, then interpret - // the next argument as a herestring - bytes.next(); - bytes.next(); - if let Some(cmd) = self.arg(&mut bytes)? { - input = Some(Input::HereString(cmd.into())); + if Some(b'<') == self.peek(i + 1) { + if Some(b'<') == self.peek(i + 2) { + // If the next two characters are arrows, then interpret + // the next argument as a herestring + bytes.next(); + bytes.next(); + if let Some(cmd) = self.arg(&mut bytes)? { + input = Some(Input::HereString(cmd.into())); + } else { + return Err("expected string argument after '<<<'"); + } } else { - return Err("expected string argument after '<<<'"); + // Otherwise, what we have is not a herestring, but a heredoc. + bytes.next(); + // Collect the rest of the byte iterator and then trim the result + // in order to get the EOF phrase that will be used to terminate + // the heredoc. + let heredoc = { + let mut buffer = Vec::new(); + while let Some((_, byte)) = bytes.next() { + buffer.push(byte); + } + unsafe { String::from_utf8_unchecked(buffer) } + }; + let heredoc = heredoc.lines().collect::<Vec<&str>>(); + // Then collect the heredoc from standard input. + input = Some(Input::HereString(heredoc[1..heredoc.len()-1].join("\n"))); } } else if let Some(file) = self.arg(&mut bytes)? { // Otherwise interpret it as stdin redirection @@ -740,6 +752,17 @@ mod tests { assert_eq!(Statement::Pipeline(expected), parse(input)); } + #[test] + fn heredoc() { + let input = "calc << EOF\n1 + 2\n3 + 4\nEOF"; + let expected = Pipeline { + jobs: vec![Job::new(array!["calc"], JobKind::Last)], + stdin: Some(Input::HereString("1 + 2\n3 + 4".into())), + stdout: None, + }; + assert_eq!(Statement::Pipeline(expected), parse(input)); + } + #[test] fn piped_herestring() { let input = "cat | tr 'o' 'x' <<< $VAR > out.log"; diff --git a/src/parser/quotes.rs b/src/parser/quotes.rs index 9c777a73fc42c8efa6810bc729d59d3508f9672c..193d04e927c381a021f6bd4bb92a9685fcefa8f2 100644 --- a/src/parser/quotes.rs +++ b/src/parser/quotes.rs @@ -7,54 +7,99 @@ bitflags! { } } - pub struct QuoteTerminator { buffer: String, + eof: Option<String>, + eof_buffer: String, read: usize, flags: Flags, } impl QuoteTerminator { pub fn new(input: String) -> QuoteTerminator { - QuoteTerminator { buffer: input, read: 0, flags: Flags::empty() } + QuoteTerminator { buffer: input, eof: None, eof_buffer: String::new(), read: 0, flags: Flags::empty() } } pub fn append(&mut self, input: String) { - self.buffer.push_str(if self.flags.contains(TRIM) { input.trim() } else { &input }); + if self.eof.is_none() { + self.buffer.push_str(if self.flags.contains(TRIM) { input.trim() } else { &input }); + } else { + self.eof_buffer.push_str(&input); + } } pub fn check_termination(&mut self) -> bool { - for character in self.buffer.bytes().skip(self.read) { - self.read += 1; - match character { - _ if self.flags.contains(BACKSL) => self.flags ^= BACKSL, - b'\\' => self.flags ^= BACKSL, - b'\'' if !self.flags.intersects(DQUOTE) => self.flags ^= SQUOTE, - b'"' if !self.flags.intersects(SQUOTE) => self.flags ^= DQUOTE, - _ => (), + let mut eof_line = None; + let eof = self.eof.clone(); + let status = if let Some(ref eof) = eof { + let line = &self.eof_buffer; + eof_line = Some([&line, "\n"].concat()); + line.trim() == eof + } else { + { + let mut eof_found = false; + { + let mut bytes = self.buffer.bytes().skip(self.read); + while let Some(character) = bytes.next() { + self.read += 1; + match character { + _ if self.flags.contains(BACKSL) => self.flags ^= BACKSL, + b'\\' => self.flags ^= BACKSL, + b'\'' if !self.flags.intersects(DQUOTE) => self.flags ^= SQUOTE, + b'"' if !self.flags.intersects(SQUOTE) => self.flags ^= DQUOTE, + b'<' if !self.flags.contains(SQUOTE | DQUOTE) => { + let as_bytes = self.buffer.as_bytes(); + if Some(&b'<') == as_bytes.get(self.read) { + self.read += 1; + if Some(&b'<') != as_bytes.get(self.read) { + use std::str; + let eof_phrase = unsafe { str::from_utf8_unchecked(&as_bytes[self.read..]) }; + self.eof = Some(eof_phrase.trim().to_owned()); + eof_found = true; + break + } + } + } + _ => (), + } + } + } + if eof_found { + self.buffer.push('\n'); + return false + } } - } - if self.flags.intersects(SQUOTE | DQUOTE) { - self.read += 1; - self.buffer.push('\n'); - false - } else { - match self.buffer.bytes().last() { - Some(b'\\') => { - let _ = self.buffer.pop(); - self.read -= 1; - self.flags |= TRIM; - false - }, - Some(b'|') | Some(b'&') => { - // self.read -= 1; - // self.flags |= TRIM; - false + if self.flags.intersects(SQUOTE | DQUOTE) { + self.read += 1; + self.buffer.push('\n'); + false + } else { + match self.buffer.bytes().last() { + Some(b'\\') => { + let _ = self.buffer.pop(); + self.read -= 1; + self.flags |= TRIM; + false + }, + Some(b'|') | Some(b'&') => { + false + } + _ => true } - _ => true + } + }; + + if let Some(line) = eof_line { + self.buffer.push_str(&line); + } + if self.eof.is_some() { + self.eof_buffer.clear(); + if status { + self.eof = None; } } + status } pub fn consume(self) -> String { self.buffer }