Skip to content
Snippets Groups Projects
Verified Commit 1acc2a1a authored by jD91mZM2's avatar jD91mZM2
Browse files

Initial regex.h implementation

parent dfa3845c
No related branches found
No related tags found
No related merge requests found
......@@ -148,6 +148,11 @@ name = "num-traits"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "posix-regex"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "proc-macro2"
version = "0.2.3"
......@@ -227,6 +232,7 @@ dependencies = [
"compiler_builtins 0.1.0 (git+https://github.com/rust-lang-nursery/compiler-builtins.git)",
"core_io 0.1.20180619",
"lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"posix-regex 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"ralloc 1.0.0",
"rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
"redox_syscall 0.1.40 (git+https://gitlab.redox-os.org/redox-os/syscall.git?branch=relibc)",
......@@ -464,6 +470,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b"
"checksum log 0.4.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d4fcce5fa49cc693c312001daf1d13411c4a5283796bac1084299ea3e567113f"
"checksum num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1"
"checksum posix-regex 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "58b31ca4f5022c6c0a22206d63c177be2f418355db5a713db22bd901c6ac0db3"
"checksum proc-macro2 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "cd07deb3c6d1d9ff827999c7f9b04cdfd66b1b17ae508e14fe47b620f2282ae0"
"checksum quote 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6e920b65c65f10b2ae65c831a81a073a89edd28c7cce89475bff467ab4167a"
"checksum rand 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "8356f47b32624fef5b3301c1be97e5944ecdd595409cc5da11d05f211db6cfbd"
......
......@@ -18,6 +18,7 @@ cc = "1.0.17"
cbitset = "0.1.0"
core_io = { path = "core_io", features = ["collections"] }
lazy_static = { version = "*", features = ["nightly", "spin_no_std"] }
posix-regex = { version = "0.1", features = ["no_std"] }
rand = { version = "0.5.2", default-features = false }
va_list = { path = "va_list", features = ["no_std"] }
......
......@@ -53,6 +53,7 @@ libc: $(BUILD)/release/libc.a $(BUILD)/release/crt0.o $(BUILD)/release/crti.o $(
libm: $(BUILD)/openlibm/libopenlibm.a
sysroot: all
rm -rf $@
rm -rf $@.partial
mkdir -p $@.partial
make install DESTDIR=$@.partial
......
......@@ -20,6 +20,7 @@ typedef long clock_t;
typedef int clockid_t;
typedef void* timer_t;
typedef unsigned long int blkcnt_t;
typedef size_t regoff_t;
typedef unsigned char u_char, uchar;
typedef unsigned short u_short, ushort;
......
......@@ -14,6 +14,7 @@ pub mod netdb;
pub mod netinet_in;
//pub mod pthread;
pub mod pwd;
pub mod regex;
pub mod semaphore;
pub mod setjmp;
pub mod sgtty;
......
sys_includes = ["sys/types.h"]
include_guard = "_TEMPLATE_H"
language = "C"
style = "Type"
[enum]
prefix_with_name = true
//! regex.h implementation, following http://pubs.opengroup.org/onlinepubs/7908799/xsh/regex.h.html
use alloc::borrow::Cow;
use alloc::boxed::Box;
use alloc::vec::Vec;
use core::{mem, slice, ptr};
use header::string::strlen;
use platform::types::*;
use posix_regex::{PosixRegexBuilder, PosixRegex};
use posix_regex::compile::{Error as CompileError, Token, Range};
#[repr(C)]
pub struct regex_t {
// Can't be a normal Vec<T> because then the struct size won't be known
// from C.
ptr: *mut c_void,
length: size_t,
capacity: size_t,
cflags: c_int,
re_nsub: size_t
}
#[repr(C)]
pub struct regmatch_t {
rm_so: regoff_t,
rm_eo: regoff_t
}
pub const REG_EXTENDED: c_int = 1;
pub const REG_ICASE: c_int = 2;
pub const REG_NOSUB: c_int = 4;
pub const REG_NEWLINE: c_int = 8;
pub const REG_NOTBOL: c_int = 16;
pub const REG_NOTEOL: c_int = 32;
pub const REG_NOMATCH: c_int = 1;
pub const REG_BADPAT: c_int = 2;
pub const REG_ECOLLATE: c_int = 3;
pub const REG_ECTYPE: c_int = 4;
pub const REG_EESCAPE: c_int = 5;
pub const REG_ESUBREG: c_int = 6;
pub const REG_EBRACK: c_int = 7;
pub const REG_ENOSYS: c_int = 8;
pub const REG_EPAREN: c_int = 9;
pub const REG_EBRACE: c_int = 10;
pub const REG_BADBR: c_int = 11;
pub const REG_ERANGE: c_int = 12;
pub const REG_ESPACE: c_int = 13;
pub const REG_BADRPT: c_int = 14;
fn count_groups(branches: &[Vec<(Token, Range)>]) -> usize {
let mut count = 0;
for branch in branches {
for (token, _) in branch {
if let Token::Group(ref inner) = token {
count += 1 + count_groups(inner);
}
}
}
count
}
#[no_mangle]
pub extern "C" fn regcomp(out: *mut regex_t, pat: *const c_char, cflags: c_int) -> c_int {
if cflags & REG_EXTENDED == REG_EXTENDED {
return REG_ENOSYS;
}
let pat = unsafe { slice::from_raw_parts(pat as *const u8, strlen(pat)) };
let res = PosixRegexBuilder::new(pat)
.with_default_classes()
.compile_tokens();
match res {
Ok(mut branches) => unsafe {
let re_nsub = count_groups(&branches);
*out = regex_t {
ptr: branches.as_mut_ptr() as *mut c_void,
length: branches.len(),
capacity: branches.capacity(),
cflags,
re_nsub,
};
mem::forget(branches);
0
},
Err(CompileError::EmptyRepetition)
| Err(CompileError::IntegerOverflow)
| Err(CompileError::IllegalRange) => REG_BADBR,
Err(CompileError::UnclosedRepetition) => REG_EBRACE,
Err(CompileError::LeadingRepetition) => REG_BADRPT,
Err(CompileError::UnknownCollation) => REG_ECOLLATE,
Err(CompileError::UnknownClass(_)) => REG_ECTYPE,
Err(_) => REG_BADPAT
}
}
#[no_mangle]
pub unsafe extern "C" fn regfree(regex: *mut regex_t) {
Vec::from_raw_parts(
(*regex).ptr as *mut Vec<(Token, Range)>,
(*regex).length,
(*regex).capacity
);
}
#[no_mangle]
pub extern "C" fn regexec(regex: *const regex_t, input: *const c_char,
nmatch: size_t, pmatch: *mut regmatch_t, eflags: c_int) -> c_int {
if eflags & REG_EXTENDED == REG_EXTENDED {
return REG_ENOSYS;
}
let regex = unsafe { &(*regex) };
// Allow specifying a compiler argument to the executor and vise versa
// because why not?
let mut flags = regex.cflags | eflags;
let input = unsafe { slice::from_raw_parts(input as *const u8, strlen(input)) };
let branches = unsafe { slice::from_raw_parts(regex.ptr as *const Vec<(Token, Range)>, regex.length) };
let matches = PosixRegex::new(Cow::Borrowed(&branches))
.case_insensitive(flags & REG_ICASE == REG_ICASE)
.newline(flags & REG_NEWLINE == REG_NEWLINE)
.no_start(flags & REG_NOTBOL == REG_NOTBOL)
.no_end(flags & REG_NOTEOL == REG_NOTEOL)
.matches(input, Some(1));
if !matches.is_empty()
&& eflags & REG_NOSUB != REG_NOSUB
&& !pmatch.is_null()
&& nmatch > 0 {
let first = &matches[0];
let len = first.len().min(nmatch as usize);
for i in 0..len {
let (start, end) = first[i];
unsafe {
*pmatch.offset(i as isize) = regmatch_t {
rm_so: start,
rm_eo: end
};
}
}
for i in len as isize..nmatch as isize {
unsafe {
*pmatch.offset(i) = regmatch_t {
rm_so: !0,
rm_eo: !0
};
}
}
}
if matches.is_empty() { REG_NOMATCH } else { 0 }
}
#[no_mangle]
pub extern "C" fn regerror(code: c_int, _regex: *const regex_t, out: *mut c_char, max: c_int) {
let string = match code {
0 => "No error\0",
REG_NOMATCH => "No match\0",
REG_BADPAT => "Invalid regexp\0",
REG_ECOLLATE => "Unknown collating element\0",
REG_ECTYPE => "Unknown character class name\0",
REG_EESCAPE => "Trailing backslash\0",
REG_ESUBREG => "Invalid back reference\0",
REG_EBRACK => "Missing ']'\0",
REG_ENOSYS => "Unsupported operation\0",
REG_EPAREN => "Missing ')'\0",
REG_EBRACE => "Missing '}'\0",
REG_BADBR => "Invalid contents of {}\0",
REG_ERANGE => "Invalid character range\0",
REG_ESPACE => "Out of memory\0",
REG_BADRPT => "Repetition not preceded by valid expression\0",
_ => "Unknown error\0"
};
unsafe {
ptr::copy_nonoverlapping(string.as_ptr(), out as *mut u8, string.len().min(max as usize))
}
}
......@@ -21,6 +21,7 @@ extern crate cbitset;
extern crate core_io;
#[macro_use]
extern crate lazy_static;
extern crate posix_regex;
extern crate rand;
extern crate va_list;
......
......@@ -46,6 +46,7 @@ pub type wchar_t = i32;
pub type wint_t = u32;
pub type wctype_t = i64;
pub type regoff_t = size_t;
pub type off_t = c_long;
pub type mode_t = c_int;
pub type time_t = c_long;
......
......@@ -11,6 +11,7 @@ EXPECT_BINS=\
locale \
math \
netdb \
regex \
select \
setjmp \
signal \
......
Matching group: 25 - 36
Matching group: 31 - 36
Matching group: -1 - -1
#include <regex.h>
#include <stdio.h>
int main() {
regex_t regex;
char error_buf[256];
int error = regcomp(&regex, "h.llo \\(w.rld\\)", REG_ICASE);
if (error) {
regerror(error, &regex, error_buf, 255);
error_buf[255] = 0;
printf("regcomp error: %d = %s\n", error, error_buf);
return -1;
}
regmatch_t matches[3] = { 0 };
error = regexec(&regex, "Hey, how are you? Hello? Hallo Wurld??", 3, matches, 0);
regfree(&regex);
if (error) {
regerror(error, &regex, error_buf, 255);
printf("regexec error: %d = %s\n", error, error_buf);
return -1;
}
for (int group = 0; group < 3; group += 1) {
printf("Matching group: %d - %d\n", matches[group].rm_so, matches[group].rm_eo);
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment