From a2f17e7660e1247f004c9227fff39673d1845512 Mon Sep 17 00:00:00 2001 From: Xavier L'Heureux <xavier.lheureux@icloud.com> Date: Fri, 5 Apr 2019 14:21:42 -0400 Subject: [PATCH] Add unicode support for function and variable names --- examples/fn.out | 2 + examples/run_examples.sh | 2 +- examples/script_exec.out | 102 +++++++++++++++++++++++++++ examples/unicode.ion | 10 +++ examples/unicode.out | 7 ++ manual/src/expansions/01-variable.md | 8 +-- manual/src/functions.md | 4 +- src/lib/parser/statement/parse.rs | 6 +- src/lib/shell/assignments.rs | 4 +- 9 files changed, 133 insertions(+), 12 deletions(-) create mode 100644 examples/unicode.ion create mode 100644 examples/unicode.out diff --git a/examples/fn.out b/examples/fn.out index 6144e859..d593d50c 100644 --- a/examples/fn.out +++ b/examples/fn.out @@ -15,3 +15,5 @@ goodbye 9 16 25 +ion: function argument has invalid type: expected int, found value '$num' +ion: function argument has invalid type: expected int, found value '$num' diff --git a/examples/run_examples.sh b/examples/run_examples.sh index 311e50e8..627cb60c 100755 --- a/examples/run_examples.sh +++ b/examples/run_examples.sh @@ -26,7 +26,7 @@ function test { EXPECTED_OUTPUT_FILE=$(echo $1 | sed 's/\..\+/\.out/') # Run script and redirect stdout into tmp file - $PROJECT_DIR/target/debug/ion "${@:2}" > $EXAMPLES_DIR/tmp.out 2> /dev/null + $PROJECT_DIR/target/debug/ion "${@:2}" > $EXAMPLES_DIR/tmp.out 2>&1 # Compare real and expected output diff "$EXAMPLES_DIR"/tmp.out "$EXPECTED_OUTPUT_FILE" > "$EXAMPLES_DIR"/diff_tmp diff --git a/examples/script_exec.out b/examples/script_exec.out index 61461fe2..d7ac537a 100644 --- a/examples/script_exec.out +++ b/examples/script_exec.out @@ -7,6 +7,15 @@ firstline secondline 3 2 1 a 3 2 1 +ion: command not found: one +ion: command not found: one +ion: command not found: o +ion: command not found: 111 +ion: command not found: o +ion: command not found: firstline +ion: command not found: 3 +ion: command not found: a +ion: command not found: 3 1 2 3 @@ -61,7 +70,62 @@ a b c d e 😉 😉 1 2 3 4 5 +ion: command not found: 1 +ion: command not found: 2 +ion: command not found: 3 +ion: command not found: 4 +ion: command not found: 5 +ion: command not found: 6 +ion: command not found: 1 +ion: command not found: 2 +ion: command not found: 3 +ion: command not found: 4 +ion: command not found: 5 +ion: command not found: 6 +ion: command not found: a +ion: command not found: b +ion: command not found: c +ion: command not found: d +ion: command not found: e +ion: command not found: a +ion: command not found: b +ion: command not found: c +ion: command not found: d +ion: command not found: e +ion: command not found: 1 +ion: command not found: 2 +ion: command not found: 3 +ion: command not found: 4 +ion: command not found: 5 +ion: command not found: 6 +ion: command not found: a +ion: command not found: b +ion: command not found: c +ion: command not found: d +ion: command not found: e +ion: command not found: 1 +ion: command not found: a +ion: command not found: 1 +ion: command not found: 1 +ion: command not found: 2 +ion: command not found: 3 +ion: command not found: 1 +ion: command not found: 2 +ion: command not found: 3 +ion: command not found: 1 +ion: command not found: 2 +ion: command not found: 3 +ion: command not found: 1 +ion: command not found: 1 +ion: command not found: 1 +ion: command not found: 4 +ion: command not found: 1 +ion: command not found: 😉 +ion: command not found: 😉 +ion: command not found: 😉 +ion: command not found: 1 pass +ion: command not found: pass 1A1 1A2 1B1 1B2 0abc2def5g 0abc2def5h 0abc2def5i 0abc2def6g 0abc2def6h 0abc2def6i 0abc2def7g 0abc2def7h 0abc2def7i 0abc3def5g 0abc3def5h 0abc3def5i 0abc3def6g 0abc3def6h 0abc3def6i 0abc3def7g 0abc3def7h 0abc3def7i 0abc4def5g 0abc4def5h 0abc4def5i 0abc4def6g 0abc4def6h 0abc4def6i 0abc4def7g 0abc4def7h 0abc4def7i 1abc2def5g 1abc2def5h 1abc2def5i 1abc2def6g 1abc2def6h 1abc2def6i 1abc2def7g 1abc2def7h 1abc2def7i 1abc3def5g 1abc3def5h 1abc3def5i 1abc3def6g 1abc3def6h 1abc3def6i 1abc3def7g 1abc3def7h 1abc3def7i 1abc4def5g 1abc4def5h 1abc4def5i 1abc4def6g 1abc4def6h 1abc4def6i 1abc4def7g 1abc4def7h 1abc4def7i -1 0 1 @@ -88,6 +152,32 @@ A C E 0 -2 -4 e c E C +ion: command not found: 1A1 +ion: command not found: 0abc2def5g +ion: command not found: -1 +ion: command not found: 2 +ion: command not found: a +ion: command not found: d +ion: command not found: A +ion: command not found: D +ion: command not found: -1 +ion: command not found: 2 +ion: command not found: a +ion: command not found: d +ion: command not found: A +ion: command not found: D +ion: command not found: 0 +ion: command not found: a +ion: command not found: A +ion: command not found: 0 +ion: command not found: e +ion: command not found: E +ion: command not found: 0 +ion: command not found: a +ion: command not found: A +ion: command not found: 0 +ion: command not found: e +ion: command not found: E 1 2 3 @@ -98,7 +188,19 @@ E C 3 4 5 +ion: command not found: 1 +ion: command not found: 2 +ion: command not found: 3 +ion: command not found: 4 +ion: command not found: 5 +ion: command not found: 1 +ion: command not found: 2 +ion: command not found: 3 +ion: command not found: 4 +ion: command not found: 5 true false foo bar +ion: command not found: foo +ion: command not found: bar diff --git a/examples/unicode.ion b/examples/unicode.ion new file mode 100644 index 00000000..1ccffd21 --- /dev/null +++ b/examples/unicode.ion @@ -0,0 +1,10 @@ +fn Җ Ҕ:int + echo $Ҕ +end + +echo Ҥ +Җ 1 +let Ҳ = ӌ +echo $Ҳ +let 1Ҳ = ӌ +echo $1Ҳ diff --git a/examples/unicode.out b/examples/unicode.out new file mode 100644 index 00000000..78310995 --- /dev/null +++ b/examples/unicode.out @@ -0,0 +1,7 @@ +Ҥ +1 +ӌ +ion: assignment error: invalid variable name +Variable names may only be (unicode) alphanumeric or `_` +The first character must be alphabetic + diff --git a/manual/src/expansions/01-variable.md b/manual/src/expansions/01-variable.md index 055a83fe..779d38b4 100644 --- a/manual/src/expansions/01-variable.md +++ b/manual/src/expansions/01-variable.md @@ -9,8 +9,8 @@ handles larger numbers. ## String Variables Like POSIX shells, the **$** sigil denotes that the following expression will be a string -expansion. If the character that follows is an accepted ASCII character, all characters that -follow will be collected until either a non-accepted ASCII character is found, or all characters +expansion. If the character that follows is an accepted Unicode character, all characters that +follow will be collected until either a non-accepted Unicode character is found, or all characters have been read. Then the characters that were collected will be used as the name of the string variable to substitute with. @@ -23,7 +23,7 @@ $ echo $string:$string ``` **NOTE:** -- Accepted characters are characters ranging from **A-Z**, **a-z**, **0-9**, and **_**. +- Accepted characters are **unicode** alphanumeric characters and **_**. - If not double quoted, newlines will be replaced with spaces. ## Array Variables @@ -46,7 +46,7 @@ double quotes is therefore equivalent to folding the elements into a single stri ## Braced Variables -Braces can also be used when you need to integrate a variable expansion along accepted ASCII +Braces can also be used when you need to integrate a variable expansion along accepted Unicode characters. ```sh diff --git a/manual/src/functions.md b/manual/src/functions.md index 75fb6e30..69d31b26 100644 --- a/manual/src/functions.md +++ b/manual/src/functions.md @@ -2,7 +2,7 @@ Functions help scripts to reduce the amount of code duplication and increase readability. Ion supports the creation of functions with a similar syntax to other languages. -The basic syntax of functions is as follos: +The basic syntax of functions is as follows: ```sh fn square @@ -53,7 +53,7 @@ You can use any of the [supported types](ch04-00-variables.md#Supported Types). As another example: ``` -fn hello name age:int hobbies[] +fn hello name age:int hobbies:[str] echo $name ($age) has the following hobbies: for hobby in @hobbies echo " $hobby" diff --git a/src/lib/parser/statement/parse.rs b/src/lib/parser/statement/parse.rs index 221ac813..e399820b 100644 --- a/src/lib/parser/statement/parse.rs +++ b/src/lib/parser/statement/parse.rs @@ -27,9 +27,9 @@ where } pub fn is_valid_name(name: &str) -> bool { - let mut bytes = name.bytes(); - bytes.next().map_or(false, |b| b.is_ascii_alphabetic()) - && bytes.all(|b| b.is_ascii_alphanumeric() || b == b'_') + let mut chars = name.chars(); + chars.next().map_or(false, |b| b.is_alphabetic()) + && chars.all(|b| b.is_alphanumeric() || b == '_') } pub(crate) fn parse(code: &str) -> Statement { diff --git a/src/lib/shell/assignments.rs b/src/lib/shell/assignments.rs index 8c466140..3bff5819 100644 --- a/src/lib/shell/assignments.rs +++ b/src/lib/shell/assignments.rs @@ -130,8 +130,8 @@ impl VariableStore for Shell { } if !is_valid_name(key.name) { - return Err("invalid variable name\nVariable names may only have A-Z, a-z, 0-9 \ - and _\nThe first character cannot be a digit" + return Err("invalid variable name\nVariable names may only be (unicode) \ + alphanumeric or `_`\nThe first character must be alphabetic" .to_string()); } -- GitLab