Verified Commit 1acc2a1a authored by jD91mZM2's avatar jD91mZM2
Browse files

Initial regex.h implementation

parent dfa3845c
......@@ -148,6 +148,11 @@ name = "num-traits"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "posix-regex"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "proc-macro2"
version = "0.2.3"
......@@ -227,6 +232,7 @@ dependencies = [
"compiler_builtins 0.1.0 (git+https://github.com/rust-lang-nursery/compiler-builtins.git)",
"core_io 0.1.20180619",
"lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"posix-regex 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"ralloc 1.0.0",
"rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
"redox_syscall 0.1.40 (git+https://gitlab.redox-os.org/redox-os/syscall.git?branch=relibc)",
......@@ -464,6 +470,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b"
"checksum log 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d4fcce5fa49cc693c312001daf1d13411c4a5283796bac1084299ea3e567113f"
"checksum num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1"
"checksum posix-regex 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "58b31ca4f5022c6c0a22206d63c177be2f418355db5a713db22bd901c6ac0db3"
"checksum proc-macro2 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "cd07deb3c6d1d9ff827999c7f9b04cdfd66b1b17ae508e14fe47b620f2282ae0"
"checksum quote 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6e920b65c65f10b2ae65c831a81a073a89edd28c7cce89475bff467ab4167a"
"checksum rand 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8356f47b32624fef5b3301c1be97e5944ecdd595409cc5da11d05f211db6cfbd"
......
......@@ -18,6 +18,7 @@ cc = "1.0.17"
cbitset = "0.1.0"
core_io = { path = "core_io", features = ["collections"] }
lazy_static = { version = "*", features = ["nightly", "spin_no_std"] }
posix-regex = { version = "0.1", features = ["no_std"] }
rand = { version = "0.5.2", default-features = false }
va_list = { path = "va_list", features = ["no_std"] }
......
......@@ -53,6 +53,7 @@ libc: $(BUILD)/release/libc.a $(BUILD)/release/crt0.o $(BUILD)/release/crti.o $(
libm: $(BUILD)/openlibm/libopenlibm.a
sysroot: all
rm -rf $@
rm -rf $@.partial
mkdir -p $@.partial
make install DESTDIR=$@.partial
......
......@@ -20,6 +20,7 @@ typedef long clock_t;
typedef int clockid_t;
typedef void* timer_t;
typedef unsigned long int blkcnt_t;
typedef size_t regoff_t;
typedef unsigned char u_char, uchar;
typedef unsigned short u_short, ushort;
......
......@@ -14,6 +14,7 @@ pub mod netdb;
pub mod netinet_in;
//pub mod pthread;
pub mod pwd;
pub mod regex;
pub mod semaphore;
pub mod setjmp;
pub mod sgtty;
......
sys_includes = ["sys/types.h"]
include_guard = "_TEMPLATE_H"
language = "C"
style = "Type"
[enum]
prefix_with_name = true
//! regex.h implementation, following http://pubs.opengroup.org/onlinepubs/7908799/xsh/regex.h.html
use alloc::borrow::Cow;
use alloc::boxed::Box;
use alloc::vec::Vec;
use core::{mem, slice, ptr};
use header::string::strlen;
use platform::types::*;
use posix_regex::{PosixRegexBuilder, PosixRegex};
use posix_regex::compile::{Error as CompileError, Token, Range};
#[repr(C)]
pub struct regex_t {
// Can't be a normal Vec<T> because then the struct size won't be known
// from C.
ptr: *mut c_void,
length: size_t,
capacity: size_t,
cflags: c_int,
re_nsub: size_t
}
#[repr(C)]
pub struct regmatch_t {
rm_so: regoff_t,
rm_eo: regoff_t
}
pub const REG_EXTENDED: c_int = 1;
pub const REG_ICASE: c_int = 2;
pub const REG_NOSUB: c_int = 4;
pub const REG_NEWLINE: c_int = 8;
pub const REG_NOTBOL: c_int = 16;
pub const REG_NOTEOL: c_int = 32;
pub const REG_NOMATCH: c_int = 1;
pub const REG_BADPAT: c_int = 2;
pub const REG_ECOLLATE: c_int = 3;
pub const REG_ECTYPE: c_int = 4;
pub const REG_EESCAPE: c_int = 5;
pub const REG_ESUBREG: c_int = 6;
pub const REG_EBRACK: c_int = 7;
pub const REG_ENOSYS: c_int = 8;
pub const REG_EPAREN: c_int = 9;
pub const REG_EBRACE: c_int = 10;
pub const REG_BADBR: c_int = 11;
pub const REG_ERANGE: c_int = 12;
pub const REG_ESPACE: c_int = 13;
pub const REG_BADRPT: c_int = 14;
fn count_groups(branches: &[Vec<(Token, Range)>]) -> usize {
let mut count = 0;
for branch in branches {
for (token, _) in branch {
if let Token::Group(ref inner) = token {
count += 1 + count_groups(inner);
}
}
}
count
}
#[no_mangle]
pub extern "C" fn regcomp(out: *mut regex_t, pat: *const c_char, cflags: c_int) -> c_int {
if cflags & REG_EXTENDED == REG_EXTENDED {
return REG_ENOSYS;
}
let pat = unsafe { slice::from_raw_parts(pat as *const u8, strlen(pat)) };
let res = PosixRegexBuilder::new(pat)
.with_default_classes()
.compile_tokens();
match res {
Ok(mut branches) => unsafe {
let re_nsub = count_groups(&branches);
*out = regex_t {
ptr: branches.as_mut_ptr() as *mut c_void,
length: branches.len(),
capacity: branches.capacity(),
cflags,
re_nsub,
};
mem::forget(branches);
0
},
Err(CompileError::EmptyRepetition)
| Err(CompileError::IntegerOverflow)
| Err(CompileError::IllegalRange) => REG_BADBR,
Err(CompileError::UnclosedRepetition) => REG_EBRACE,
Err(CompileError::LeadingRepetition) => REG_BADRPT,
Err(CompileError::UnknownCollation) => REG_ECOLLATE,
Err(CompileError::UnknownClass(_)) => REG_ECTYPE,
Err(_) => REG_BADPAT
}
}
#[no_mangle]
pub unsafe extern "C" fn regfree(regex: *mut regex_t) {
Vec::from_raw_parts(
(*regex).ptr as *mut Vec<(Token, Range)>,
(*regex).length,
(*regex).capacity
);
}
#[no_mangle]
pub extern "C" fn regexec(regex: *const regex_t, input: *const c_char,
nmatch: size_t, pmatch: *mut regmatch_t, eflags: c_int) -> c_int {
if eflags & REG_EXTENDED == REG_EXTENDED {
return REG_ENOSYS;
}
let regex = unsafe { &(*regex) };
// Allow specifying a compiler argument to the executor and vise versa
// because why not?
let mut flags = regex.cflags | eflags;
let input = unsafe { slice::from_raw_parts(input as *const u8, strlen(input)) };
let branches = unsafe { slice::from_raw_parts(regex.ptr as *const Vec<(Token, Range)>, regex.length) };
let matches = PosixRegex::new(Cow::Borrowed(&branches))
.case_insensitive(flags & REG_ICASE == REG_ICASE)
.newline(flags & REG_NEWLINE == REG_NEWLINE)
.no_start(flags & REG_NOTBOL == REG_NOTBOL)
.no_end(flags & REG_NOTEOL == REG_NOTEOL)
.matches(input, Some(1));
if !matches.is_empty()
&& eflags & REG_NOSUB != REG_NOSUB
&& !pmatch.is_null()
&& nmatch > 0 {
let first = &matches[0];
let len = first.len().min(nmatch as usize);
for i in 0..len {
let (start, end) = first[i];
unsafe {
*pmatch.offset(i as isize) = regmatch_t {
rm_so: start,
rm_eo: end
};
}
}
for i in len as isize..nmatch as isize {
unsafe {
*pmatch.offset(i) = regmatch_t {
rm_so: !0,
rm_eo: !0
};
}
}
}
if matches.is_empty() { REG_NOMATCH } else { 0 }
}
#[no_mangle]
pub extern "C" fn regerror(code: c_int, _regex: *const regex_t, out: *mut c_char, max: c_int) {
let string = match code {
0 => "No error\0",
REG_NOMATCH => "No match\0",
REG_BADPAT => "Invalid regexp\0",
REG_ECOLLATE => "Unknown collating element\0",
REG_ECTYPE => "Unknown character class name\0",
REG_EESCAPE => "Trailing backslash\0",
REG_ESUBREG => "Invalid back reference\0",
REG_EBRACK => "Missing ']'\0",
REG_ENOSYS => "Unsupported operation\0",
REG_EPAREN => "Missing ')'\0",
REG_EBRACE => "Missing '}'\0",
REG_BADBR => "Invalid contents of {}\0",
REG_ERANGE => "Invalid character range\0",
REG_ESPACE => "Out of memory\0",
REG_BADRPT => "Repetition not preceded by valid expression\0",
_ => "Unknown error\0"
};
unsafe {
ptr::copy_nonoverlapping(string.as_ptr(), out as *mut u8, string.len().min(max as usize))
}
}
......@@ -21,6 +21,7 @@ extern crate cbitset;
extern crate core_io;
#[macro_use]
extern crate lazy_static;
extern crate posix_regex;
extern crate rand;
extern crate va_list;
......
......@@ -46,6 +46,7 @@ pub type wchar_t = i32;
pub type wint_t = u32;
pub type wctype_t = i64;
pub type regoff_t = size_t;
pub type off_t = c_long;
pub type mode_t = c_int;
pub type time_t = c_long;
......
......@@ -11,6 +11,7 @@ EXPECT_BINS=\
locale \
math \
netdb \
regex \
select \
setjmp \
signal \
......
Matching group: 25 - 36
Matching group: 31 - 36
Matching group: -1 - -1
#include <regex.h>
#include <stdio.h>
int main() {
regex_t regex;
char error_buf[256];
int error = regcomp(&regex, "h.llo \\(w.rld\\)", REG_ICASE);
if (error) {
regerror(error, &regex, error_buf, 255);
error_buf[255] = 0;
printf("regcomp error: %d = %s\n", error, error_buf);
return -1;
}
regmatch_t matches[3] = { 0 };
error = regexec(&regex, "Hey, how are you? Hello? Hallo Wurld??", 3, matches, 0);
regfree(&regex);
if (error) {
regerror(error, &regex, error_buf, 255);
printf("regexec error: %d = %s\n", error, error_buf);
return -1;
}
for (int group = 0; group < 3; group += 1) {
printf("Matching group: %d - %d\n", matches[group].rm_so, matches[group].rm_eo);
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment