Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
P
posix-regex
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
redox-os
posix-regex
Compare revisions
bf7d93bb7aba406dc135a7a9eb031371e7739fe6 to 0d996efe5cfe7ce181af35d8817ac4deae644d4a
Compare revisions
Changes are shown as if the
source
revision was being merged into the
target
revision.
Learn more about comparing revisions.
Source
redox-os/posix-regex
Select target project
No results found
0d996efe5cfe7ce181af35d8817ac4deae644d4a
Select Git revision
Branches
master
Tags
0.1.0
0.1.1
Swap
Target
redox-os/posix-regex
Select target project
gmacd/posix-regex
darley/posix-regex
redox-os/posix-regex
mati865/posix-regex
Ramla-I/posix-regex
devnexen/posix-regex
lygstate/posix-regex
ayf/posix-regex
KGrewal1/posix-regex
josh/posix-regex
10 results
bf7d93bb7aba406dc135a7a9eb031371e7739fe6
Select Git revision
Branches
master
Tags
0.1.0
0.1.1
Show changes
Only incoming changes from source
Include changes to target since source was created
Compare
Commits on Source (2)
Stabilize the number of groups being returned
· 51369b25
jD91mZM2
authored
6 years ago
Verified
51369b25
Fix group 0 with inexact matches
· 0d996efe
jD91mZM2
authored
6 years ago
Verified
0d996efe
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/compile.rs
+24
-12
24 additions, 12 deletions
src/compile.rs
src/matcher.rs
+109
-88
109 additions, 88 deletions
src/matcher.rs
with
133 additions
and
100 deletions
src/compile.rs
View file @
0d996efe
...
@@ -46,7 +46,10 @@ pub enum Token {
...
@@ -46,7 +46,10 @@ pub enum Token {
Any
,
Any
,
Char
(
u8
),
Char
(
u8
),
End
,
End
,
Group
(
Vec
<
Vec
<
(
Token
,
Range
)
>>
),
Group
{
id
:
usize
,
branches
:
Vec
<
Vec
<
(
Token
,
Range
)
>>
},
OneOf
{
OneOf
{
invert
:
bool
,
invert
:
bool
,
list
:
Vec
<
Collation
>
list
:
Vec
<
Collation
>
...
@@ -63,7 +66,7 @@ impl fmt::Debug for Token {
...
@@ -63,7 +66,7 @@ impl fmt::Debug for Token {
Token
::
Any
=>
write!
(
f
,
"."
),
Token
::
Any
=>
write!
(
f
,
"."
),
Token
::
Char
(
c
)
=>
write!
(
f
,
"{:?}"
,
c
as
char
),
Token
::
Char
(
c
)
=>
write!
(
f
,
"{:?}"
,
c
as
char
),
Token
::
End
=>
write!
(
f
,
"$"
),
Token
::
End
=>
write!
(
f
,
"$"
),
Token
::
Group
(
ref
inner
)
=>
write!
(
f
,
"Group({:?})"
,
inner
),
Token
::
Group
{
ref
branches
,
..
}
=>
write!
(
f
,
"Group({:?})"
,
branches
),
Token
::
OneOf
{
invert
,
ref
list
}
=>
write!
(
f
,
"[invert: {}; {:?}]"
,
invert
,
list
),
Token
::
OneOf
{
invert
,
ref
list
}
=>
write!
(
f
,
"[invert: {}; {:?}]"
,
invert
,
list
),
Token
::
Start
=>
write!
(
f
,
"^"
),
Token
::
Start
=>
write!
(
f
,
"^"
),
Token
::
WordEnd
=>
write!
(
f
,
">"
),
Token
::
WordEnd
=>
write!
(
f
,
">"
),
...
@@ -89,14 +92,16 @@ pub enum Error {
...
@@ -89,14 +92,16 @@ pub enum Error {
/// A regex builder struct
/// A regex builder struct
pub
struct
PosixRegexBuilder
<
'a
>
{
pub
struct
PosixRegexBuilder
<
'a
>
{
input
:
&
'a
[
u8
],
input
:
&
'a
[
u8
],
classes
:
HashMap
<&
'a
[
u8
],
fn
(
u8
)
->
bool
>
classes
:
HashMap
<&
'a
[
u8
],
fn
(
u8
)
->
bool
>
,
group_id
:
usize
}
}
impl
<
'a
>
PosixRegexBuilder
<
'a
>
{
impl
<
'a
>
PosixRegexBuilder
<
'a
>
{
/// Create a new instance that is ready to parse the regex `input`
/// Create a new instance that is ready to parse the regex `input`
pub
fn
new
(
input
:
&
'a
[
u8
])
->
Self
{
pub
fn
new
(
input
:
&
'a
[
u8
])
->
Self
{
Self
{
Self
{
input
,
input
,
classes
:
HashMap
::
new
()
classes
:
HashMap
::
new
(),
group_id
:
1
}
}
}
}
/// Add a custom collation class, for use within square brackets (such as `[[:digit:]]`)
/// Add a custom collation class, for use within square brackets (such as `[[:digit:]]`)
...
@@ -125,7 +130,7 @@ impl<'a> PosixRegexBuilder<'a> {
...
@@ -125,7 +130,7 @@ impl<'a> PosixRegexBuilder<'a> {
self
self
}
}
/// "Compile" this regex to a struct ready to match input
/// "Compile" this regex to a struct ready to match input
pub
fn
compile
(
&
mut
self
)
->
Result
<
PosixRegex
<
'static
>
,
Error
>
{
pub
fn
compile
(
mut
self
)
->
Result
<
PosixRegex
<
'static
>
,
Error
>
{
let
search
=
self
.compile_tokens
()
?
;
let
search
=
self
.compile_tokens
()
?
;
Ok
(
PosixRegex
::
new
(
Cow
::
Owned
(
search
)))
Ok
(
PosixRegex
::
new
(
Cow
::
Owned
(
search
)))
}
}
...
@@ -238,7 +243,14 @@ impl<'a> PosixRegexBuilder<'a> {
...
@@ -238,7 +243,14 @@ impl<'a> PosixRegexBuilder<'a> {
}
}
},
},
b'\\'
=>
match
self
.next
()
?
{
b'\\'
=>
match
self
.next
()
?
{
b'('
=>
Token
::
Group
(
self
.compile_tokens
()
?
),
b'('
=>
{
let
id
=
self
.group_id
;
self
.group_id
+=
1
;
Token
::
Group
{
id
,
branches
:
self
.compile_tokens
()
?
}
},
b')'
=>
{
b')'
=>
{
alternatives
.push
(
chain
);
alternatives
.push
(
chain
);
return
Ok
(
alternatives
);
return
Ok
(
alternatives
);
...
@@ -327,19 +339,19 @@ mod tests {
...
@@ -327,19 +339,19 @@ mod tests {
}
}
#[test]
#[test]
fn
groups
()
{
fn
groups
()
{
assert_eq!
(
compile
(
br"\(abc\|bcd\|cde\)"
),
&
[
t
(
Token
::
Group
(
vec!
[
assert_eq!
(
compile
(
br"\(abc\|bcd\|cde\)"
),
&
[
t
(
Token
::
Group
{
id
:
1
,
branches
:
vec!
[
vec!
[
c
(
b'a'
),
c
(
b'b'
),
c
(
b'c'
)],
vec!
[
c
(
b'a'
),
c
(
b'b'
),
c
(
b'c'
)],
vec!
[
c
(
b'b'
),
c
(
b'c'
),
c
(
b'd'
)],
vec!
[
c
(
b'b'
),
c
(
b'c'
),
c
(
b'd'
)],
vec!
[
c
(
b'c'
),
c
(
b'd'
),
c
(
b'e'
)]
vec!
[
c
(
b'c'
),
c
(
b'd'
),
c
(
b'e'
)]
]
)
)]);
]
}
)]);
assert_eq!
(
compile
(
br"\(abc\|\(bcd\|cde\)\)"
),
&
[
assert_eq!
(
compile
(
br"\(abc\|\(bcd\|cde\)\)"
),
&
[
t
(
Token
::
Group
(
vec!
[
t
(
Token
::
Group
{
id
:
1
,
branches
:
vec!
[
vec!
[
c
(
b'a'
),
c
(
b'b'
),
c
(
b'c'
)],
vec!
[
c
(
b'a'
),
c
(
b'b'
),
c
(
b'c'
)],
vec!
[
t
(
Token
::
Group
(
vec!
[
vec!
[
t
(
Token
::
Group
{
id
:
2
,
branches
:
vec!
[
vec!
[
c
(
b'b'
),
c
(
b'c'
),
c
(
b'd'
)],
vec!
[
c
(
b'b'
),
c
(
b'c'
),
c
(
b'd'
)],
vec!
[
c
(
b'c'
),
c
(
b'd'
),
c
(
b'e'
)]
vec!
[
c
(
b'c'
),
c
(
b'd'
),
c
(
b'e'
)]
]
)
)]
]
}
)]
]
)
)
]
}
)
]);
]);
}
}
#[test]
#[test]
...
...
This diff is collapsed.
Click to expand it.
src/matcher.rs
View file @
0d996efe
...
@@ -6,8 +6,8 @@ use std::prelude::*;
...
@@ -6,8 +6,8 @@ use std::prelude::*;
use
compile
::{
Token
,
Range
};
use
compile
::{
Token
,
Range
};
use
ctype
;
use
ctype
;
use
std
::
borrow
::
Cow
;
use
std
::
borrow
::
Cow
;
use
std
::
fmt
;
use
std
::
rc
::
Rc
;
use
std
::
rc
::
Rc
;
use
std
::{
fmt
,
mem
};
/// A regex matcher, ready to match stuff
/// A regex matcher, ready to match stuff
#[derive(Clone)]
#[derive(Clone)]
...
@@ -57,62 +57,83 @@ impl<'a> PosixRegex<'a> {
...
@@ -57,62 +57,83 @@ impl<'a> PosixRegex<'a> {
self
.no_end
=
value
;
self
.no_end
=
value
;
self
self
}
}
/// Return the total number of matches that **will** be returned by
/// `matches_exact` or in each match in `matches`.
pub
fn
count_groups
(
&
self
)
->
usize
{
let
mut
count
=
1
;
for
branch
in
&*
self
.branches
{
count
+=
count_groups
(
branch
);
}
count
}
/// Match the string starting at the current position. This does not find
/// Match the string starting at the current position. This does not find
/// substrings.
/// substrings.
pub
fn
matches_exact
(
&
self
,
input
:
&
[
u8
])
->
Option
<
Vec
<
(
usize
,
usize
)
>>
{
pub
fn
matches_exact
(
&
self
,
input
:
&
[
u8
])
->
Option
<
Box
<
[
Option
<
(
usize
,
usize
)
>
]
>>
{
let
mut
groups
=
Vec
::
new
();
let
mut
matcher
=
PosixRegexMatcher
{
let
mut
matcher
=
PosixRegexMatcher
{
base
:
self
,
base
:
self
,
input
,
input
,
offset
:
0
,
offset
:
0
groups
:
&
mut
groups
};
};
let
branches
=
self
.branches
.iter
()
let
branches
=
self
.branches
.iter
()
.filter_map
(|
tokens
|
Branch
::
new
(
tokens
))
.filter_map
(|
tokens
|
Branch
::
new
(
true
,
tokens
))
.collect
();
.collect
();
matcher
.groups
.push
((
matcher
.offset
,
0
));
let
start
=
matcher
.offset
;
if
!
matcher
.matches_exact
(
branches
)
{
match
matcher
.matches_exact
(
branches
)
{
return
None
;
None
=>
None
,
Some
(
mut
groups
)
=>
{
assert_eq!
(
groups
[
0
],
None
);
groups
[
0
]
=
Some
((
start
,
matcher
.offset
));
Some
(
groups
)
}
}
}
groups
[
0
]
.1
=
matcher
.offset
;
Some
(
groups
)
}
}
/// Match any substrings in the string, but optionally no more than `max`
/// Match any substrings in the string, but optionally no more than `max`
pub
fn
matches
(
&
self
,
input
:
&
[
u8
],
mut
max
:
Option
<
usize
>
)
->
Vec
<
Vec
<
(
usize
,
usize
)
>>
{
pub
fn
matches
(
&
self
,
input
:
&
[
u8
],
mut
max
:
Option
<
usize
>
)
->
Vec
<
Box
<
[
Option
<
(
usize
,
usize
)
>
]
>>
{
let
mut
groups
=
Vec
::
new
();
let
mut
matcher
=
PosixRegexMatcher
{
let
mut
matcher
=
PosixRegexMatcher
{
base
:
self
,
base
:
self
,
input
,
input
,
offset
:
0
,
offset
:
0
groups
:
&
mut
groups
};
};
let
tokens
=
vec!
[
let
tokens
=
vec!
[
(
Token
::
InternalStart
,
Range
(
0
,
None
)),
(
Token
::
InternalStart
,
Range
(
0
,
None
)),
(
Token
::
Group
(
self
.branches
.to_vec
()
)
,
Range
(
1
,
Some
(
1
)))
(
Token
::
Group
{
id
:
0
,
branches
:
self
.branches
.to_vec
()
}
,
Range
(
1
,
Some
(
1
)))
];
];
let
branches
=
vec!
[
let
branches
=
vec!
[
Branch
::
new
(
&
tokens
)
.unwrap
()
Branch
::
new
(
false
,
&
tokens
)
.unwrap
()
];
];
let
mut
matches
=
Vec
::
new
();
let
mut
matches
=
Vec
::
new
();
while
max
.map
(|
max
|
max
>
0
)
.unwrap_or
(
true
)
&&
matcher
.matches_exact
(
branches
.clone
())
{
while
max
.map
(|
max
|
max
>
0
)
.unwrap_or
(
true
)
{
matches
.push
(
mem
::
replace
(
matcher
.groups
,
Vec
::
new
()));
match
matcher
.matches_exact
(
branches
.clone
())
{
Some
(
groups
)
=>
matches
.push
(
groups
),
None
=>
break
}
max
=
max
.map
(|
max
|
max
-
1
);
max
=
max
.map
(|
max
|
max
-
1
);
}
}
matches
matches
}
}
}
}
fn
count_groups
(
tokens
:
&
[(
Token
,
Range
)])
->
usize
{
let
mut
groups
=
0
;
for
(
token
,
_
)
in
tokens
{
if
let
Token
::
Group
{
ref
branches
,
..
}
=
token
{
groups
+=
1
;
for
branch
in
branches
{
groups
+=
count_groups
(
branch
);
}
}
}
groups
}
#[derive(Debug,
Clone,
PartialEq,
Eq)]
#[derive(Debug,
Clone,
PartialEq,
Eq)]
struct
Group
{
struct
Group
{
index
:
usize
,
index
:
usize
,
variant
:
usize
,
variant
:
usize
,
id
:
usize
start
:
usize
,
end
:
usize
}
}
#[derive(Clone)]
#[derive(Clone)]
...
@@ -121,7 +142,7 @@ struct Branch<'a> {
...
@@ -121,7 +142,7 @@ struct Branch<'a> {
repeated
:
u32
,
repeated
:
u32
,
tokens
:
&
'a
[(
Token
,
Range
)],
tokens
:
&
'a
[(
Token
,
Range
)],
path
:
Box
<
[
Group
]
>
,
path
:
Box
<
[
Group
]
>
,
prev
:
Vec
<
(
Box
<
[(
usize
,
usize
)]
>
,
(
usize
,
usize
))
>
,
prev
:
Box
<
[
Option
<
(
usize
,
usize
)
>
]
>
,
parent
:
Option
<
Rc
<
Branch
<
'a
>>>
parent
:
Option
<
Rc
<
Branch
<
'a
>>>
}
}
...
@@ -134,7 +155,7 @@ impl<'a> fmt::Debug for Branch<'a> {
...
@@ -134,7 +155,7 @@ impl<'a> fmt::Debug for Branch<'a> {
}
}
}
}
impl
<
'a
>
Branch
<
'a
>
{
impl
<
'a
>
Branch
<
'a
>
{
fn
new
(
tokens
:
&
'a
[(
Token
,
Range
)])
->
Option
<
Self
>
{
fn
new
(
exact
:
bool
,
tokens
:
&
'a
[(
Token
,
Range
)])
->
Option
<
Self
>
{
if
tokens
.is_empty
()
{
if
tokens
.is_empty
()
{
return
None
;
return
None
;
}
}
...
@@ -143,14 +164,14 @@ impl<'a> Branch<'a> {
...
@@ -143,14 +164,14 @@ impl<'a> Branch<'a> {
repeated
:
0
,
repeated
:
0
,
tokens
:
tokens
,
tokens
:
tokens
,
path
:
Box
::
new
([]),
path
:
Box
::
new
([]),
prev
:
Vec
::
new
(),
prev
:
vec!
[
None
;
if
exact
{
1
}
else
{
0
}
+
count_groups
(
tokens
)]
.into_boxed_slice
(),
parent
:
None
parent
:
None
})
})
}
}
fn
group
(
fn
group
(
path
:
Box
<
[
Group
]
>
,
path
:
Box
<
[
Group
]
>
,
prev
:
Vec
<
(
Box
<
[(
usize
,
usize
)]
>
,
(
usize
,
usize
))
>
,
prev
:
Box
<
[
Option
<
(
usize
,
usize
)
>
]
>
,
tokens
:
&
'a
[(
Token
,
Range
)],
tokens
:
&
'a
[(
Token
,
Range
)],
mut
parent
:
Branch
<
'a
>
mut
parent
:
Branch
<
'a
>
)
->
Option
<
Self
>
{
)
->
Option
<
Self
>
{
...
@@ -174,7 +195,7 @@ impl<'a> Branch<'a> {
...
@@ -174,7 +195,7 @@ impl<'a> Branch<'a> {
if
len
>
0
{
if
len
>
0
{
for
group
in
&
self
.path
[
..
len
-
1
]
{
for
group
in
&
self
.path
[
..
len
-
1
]
{
match
tokens
[
group
.index
]
{
match
tokens
[
group
.index
]
{
(
Token
::
Group
(
ref
inner
)
,
_
)
=>
tokens
=
&
inner
[
group
.variant
],
(
Token
::
Group
{
ref
branches
,
..
}
,
_
)
=>
tokens
=
&
branches
[
group
.variant
],
_
=>
panic!
(
"non-group index in path"
)
_
=>
panic!
(
"non-group index in path"
)
}
}
}
}
...
@@ -187,7 +208,7 @@ impl<'a> Branch<'a> {
...
@@ -187,7 +208,7 @@ impl<'a> Branch<'a> {
if
let
Some
(
group
)
=
self
.path
.last
()
{
if
let
Some
(
group
)
=
self
.path
.last
()
{
match
tokens
[
group
.index
]
{
match
tokens
[
group
.index
]
{
(
Token
::
Group
(
ref
inner
)
,
_
)
=>
tokens
=
&
inner
[
group
.variant
],
(
Token
::
Group
{
ref
branches
,
..
}
,
_
)
=>
tokens
=
&
branches
[
group
.variant
],
_
=>
panic!
(
"non-group index in path"
)
_
=>
panic!
(
"non-group index in path"
)
}
}
}
}
...
@@ -199,20 +220,13 @@ impl<'a> Branch<'a> {
...
@@ -199,20 +220,13 @@ impl<'a> Branch<'a> {
}
}
fn
update_group_end
(
&
mut
self
,
offset
:
usize
)
{
fn
update_group_end
(
&
mut
self
,
offset
:
usize
)
{
for
group
in
&
mut
*
self
.path
{
for
group
in
&
mut
*
self
.path
{
group
.end
=
offset
;
self
.prev
[
group
.id
]
.as_mut
()
.unwrap
()
.1
=
offset
;
}
}
}
}
fn
push_to_prev
(
&
self
,
prev
:
&
mut
Vec
<
(
Box
<
[(
usize
,
usize
)]
>
,
(
usize
,
usize
))
>
)
{
fn
extend
(
&
self
,
prev
:
&
mut
Box
<
[
Option
<
(
usize
,
usize
)
>
]
>
)
{
for
i
in
0
..
self
.path
.len
()
{
for
(
i
,
&
group
)
in
self
.prev
.iter
()
.enumerate
()
{
let
key
:
Vec
<
_
>
=
self
.path
[
..=
i
]
.iter
()
.map
(|
g
|
(
g
.index
,
g
.variant
))
.collect
();
if
group
.is_some
()
{
let
key
=
key
.into
();
prev
[
i
]
=
group
;
let
group
=
&
self
.path
[
i
];
let
value
=
(
group
.start
,
group
.end
);
if
let
Some
(
slot
)
=
prev
.iter_mut
()
.find
(|(
key2
,
_
)|
key
==
*
key2
)
{
*
slot
=
(
key
,
value
);
}
else
{
prev
.push
((
key
,
value
));
}
}
}
}
}
}
...
@@ -227,14 +241,7 @@ impl<'a> Branch<'a> {
...
@@ -227,14 +241,7 @@ impl<'a> Branch<'a> {
if
let
Some
(
mut
next
)
=
parent
.next_branch
()
{
if
let
Some
(
mut
next
)
=
parent
.next_branch
()
{
// Group is closing, migrate previous & current groups to next.
// Group is closing, migrate previous & current groups to next.
for
(
key
,
value
)
in
&
self
.prev
{
self
.extend
(
&
mut
next
.prev
);
if
let
Some
(
slot
)
=
next
.prev
.iter_mut
()
.find
(|(
key2
,
_
)|
key
==
key2
)
{
*
slot
=
(
key
.clone
(),
value
.clone
());
}
else
{
next
.prev
.push
((
key
.clone
(),
value
.clone
()));
}
}
self
.push_to_prev
(
&
mut
next
.prev
);
return
Some
(
next
);
return
Some
(
next
);
}
}
...
@@ -249,20 +256,22 @@ impl<'a> Branch<'a> {
...
@@ -249,20 +256,22 @@ impl<'a> Branch<'a> {
fn
add_repeats
(
&
self
,
branches
:
&
mut
Vec
<
Branch
<
'a
>>
,
offset
:
usize
)
{
fn
add_repeats
(
&
self
,
branches
:
&
mut
Vec
<
Branch
<
'a
>>
,
offset
:
usize
)
{
let
mut
branch
=
self
;
let
mut
branch
=
self
;
loop
{
loop
{
if
let
(
Token
::
Group
(
ref
alternatives
)
,
Range
(
_
,
max
))
=
*
branch
.get_token
()
{
if
let
(
Token
::
Group
{
id
,
branches
:
ref
alternatives
}
,
Range
(
_
,
max
))
=
*
branch
.get_token
()
{
if
max
.map
(|
max
|
branch
.repeated
<
max
)
.unwrap_or
(
true
)
{
if
max
.map
(|
max
|
branch
.repeated
<
max
)
.unwrap_or
(
true
)
{
for
alternative
in
0
..
alternatives
.len
()
{
for
alternative
in
0
..
alternatives
.len
()
{
let
mut
path
=
branch
.path
.to_vec
();
let
mut
path
=
branch
.path
.to_vec
();
path
.push
(
Group
{
path
.push
(
Group
{
start
:
offset
,
variant
:
alternative
,
variant
:
alternative
,
index
:
branch
.index
,
index
:
branch
.index
,
end
:
0
id
});
});
let
mut
prev
=
self
.prev
.clone
();
prev
[
id
]
.get_or_insert
((
0
,
0
))
.0
=
offset
;
if
let
Some
(
group
)
=
Branch
::
group
(
if
let
Some
(
group
)
=
Branch
::
group
(
path
.into_boxed_slice
(),
path
.into_boxed_slice
(),
branch
.prev
.clone
()
,
prev
,
branch
.tokens
,
branch
.tokens
,
branch
.clone
()
branch
.clone
()
)
{
)
{
...
@@ -313,8 +322,7 @@ impl<'a> Branch<'a> {
...
@@ -313,8 +322,7 @@ impl<'a> Branch<'a> {
struct
PosixRegexMatcher
<
'a
>
{
struct
PosixRegexMatcher
<
'a
>
{
base
:
&
'a
PosixRegex
<
'a
>
,
base
:
&
'a
PosixRegex
<
'a
>
,
input
:
&
'a
[
u8
],
input
:
&
'a
[
u8
],
offset
:
usize
,
offset
:
usize
groups
:
&
'a
mut
Vec
<
(
usize
,
usize
)
>
}
}
impl
<
'a
>
PosixRegexMatcher
<
'a
>
{
impl
<
'a
>
PosixRegexMatcher
<
'a
>
{
fn
expand
<
'b
>
(
&
mut
self
,
branches
:
&
mut
[
Branch
<
'b
>
])
->
Vec
<
Branch
<
'b
>>
{
fn
expand
<
'b
>
(
&
mut
self
,
branches
:
&
mut
[
Branch
<
'b
>
])
->
Vec
<
Branch
<
'b
>>
{
...
@@ -325,20 +333,22 @@ impl<'a> PosixRegexMatcher<'a> {
...
@@ -325,20 +333,22 @@ impl<'a> PosixRegexMatcher<'a> {
let
(
ref
token
,
range
)
=
*
branch
.get_token
();
let
(
ref
token
,
range
)
=
*
branch
.get_token
();
if
let
Token
::
Group
(
ref
inner
)
=
token
{
if
let
Token
::
Group
{
id
,
branches
:
ref
inner
}
=
*
token
{
for
alternation
in
0
..
inner
.len
()
{
for
alternation
in
0
..
inner
.len
()
{
let
mut
path
=
Vec
::
with_capacity
(
branch
.path
.len
()
+
1
);
let
mut
path
=
Vec
::
with_capacity
(
branch
.path
.len
()
+
1
);
path
.extend_from_slice
(
&
branch
.path
);
path
.extend_from_slice
(
&
branch
.path
);
path
.push
(
Group
{
path
.push
(
Group
{
index
:
branch
.index
,
index
:
branch
.index
,
variant
:
alternation
,
variant
:
alternation
,
start
:
self
.offset
,
id
end
:
0
});
});
let
mut
prev
=
branch
.prev
.clone
();
prev
[
id
]
.get_or_insert
((
0
,
0
))
.0
=
self
.offset
;
if
let
Some
(
branch
)
=
Branch
::
group
(
if
let
Some
(
branch
)
=
Branch
::
group
(
path
.into
(),
path
.into
(),
branch
.prev
.clone
()
,
prev
,
branch
.tokens
,
branch
.tokens
,
branch
.clone
()
branch
.clone
()
)
{
)
{
...
@@ -362,7 +372,7 @@ impl<'a> PosixRegexMatcher<'a> {
...
@@ -362,7 +372,7 @@ impl<'a> PosixRegexMatcher<'a> {
insert
insert
}
}
fn
matches_exact
(
&
mut
self
,
mut
branches
:
Vec
<
Branch
>
)
->
bool
{
fn
matches_exact
(
&
mut
self
,
mut
branches
:
Vec
<
Branch
>
)
->
Option
<
Box
<
[
Option
<
(
usize
,
usize
)
>
]
>>
{
// Whether or not any branch, at any point, got fully explored. This
// Whether or not any branch, at any point, got fully explored. This
// means at least one path of the regex successfully completed!
// means at least one path of the regex successfully completed!
let
mut
succeeded
=
None
;
let
mut
succeeded
=
None
;
...
@@ -432,7 +442,7 @@ impl<'a> PosixRegexMatcher<'a> {
...
@@ -432,7 +442,7 @@ impl<'a> PosixRegexMatcher<'a> {
// Step 3: Check if the token matches
// Step 3: Check if the token matches
accepts
=
accepts
&&
match
*
token
{
accepts
=
accepts
&&
match
*
token
{
Token
::
InternalStart
=>
next
.is_some
(),
Token
::
InternalStart
=>
next
.is_some
(),
Token
::
Group
(
_
)
=>
false
,
// <- content is already expanded and handled
Token
::
Group
{
..
}
=>
false
,
// <- content is already expanded and handled
Token
::
Any
=>
next
.map
(|
c
|
!
self
.base.newline
||
c
!=
b'\n'
)
.unwrap_or
(
false
),
Token
::
Any
=>
next
.map
(|
c
|
!
self
.base.newline
||
c
!=
b'\n'
)
.unwrap_or
(
false
),
Token
::
Char
(
c
)
=>
if
self
.base.case_insensitive
{
Token
::
Char
(
c
)
=>
if
self
.base.case_insensitive
{
...
@@ -471,16 +481,7 @@ impl<'a> PosixRegexMatcher<'a> {
...
@@ -471,16 +481,7 @@ impl<'a> PosixRegexMatcher<'a> {
if
branches
.is_empty
()
||
if
branches
.is_empty
()
||
// The internal start thing is lazy, not greedy:
// The internal start thing is lazy, not greedy:
(
succeeded
.is_some
()
&&
branches
.iter
()
.all
(|
t
|
t
.get_token
()
.0
==
Token
::
InternalStart
))
{
(
succeeded
.is_some
()
&&
branches
.iter
()
.all
(|
t
|
t
.get_token
()
.0
==
Token
::
InternalStart
))
{
if
let
Some
(
ref
branch
)
=
succeeded
{
return
succeeded
.map
(|
branch
|
branch
.prev
);
// Push the bounds of all successful groups
let
mut
prev
=
branch
.prev
.clone
();
branch
.push_to_prev
(
&
mut
prev
);
for
&
(
_
,
group
)
in
&
prev
{
self
.groups
.push
(
group
);
}
}
return
succeeded
.is_some
();
}
}
if
next
.is_some
()
{
if
next
.is_some
()
{
...
@@ -502,17 +503,29 @@ mod tests {
...
@@ -502,17 +503,29 @@ mod tests {
use
super
::
*
;
use
super
::
*
;
use
::
PosixRegexBuilder
;
use
::
PosixRegexBuilder
;
// FIXME: Workaround to coerce a Box<[T; N]> into a Box<[T]>. Use type
// ascription when stabilized.
fn
boxed_slice
<
T
>
(
slice
:
Box
<
[
T
]
>
)
->
Box
<
[
T
]
>
{
slice
}
macro_rules!
abox
{
(
$
(
$item:expr
),
*
)
=>
{
boxed_slice
(
Box
::
new
([
$
(
$item
),
*
]))
}
}
fn
compile
(
regex
:
&
str
)
->
PosixRegex
{
fn
compile
(
regex
:
&
str
)
->
PosixRegex
{
PosixRegexBuilder
::
new
(
regex
.as_bytes
())
PosixRegexBuilder
::
new
(
regex
.as_bytes
())
.with_default_classes
()
.with_default_classes
()
.compile
()
.compile
()
.expect
(
"error compiling regex"
)
.expect
(
"error compiling regex"
)
}
}
fn
matches
(
regex
:
&
str
,
input
:
&
str
)
->
Vec
<
Vec
<
(
usize
,
usize
)
>>
{
fn
matches
(
regex
:
&
str
,
input
:
&
str
)
->
Vec
<
Box
<
[
Option
<
(
usize
,
usize
)
>
]
>
>
{
compile
(
regex
)
compile
(
regex
)
.matches
(
input
.as_bytes
(),
None
)
.matches
(
input
.as_bytes
(),
None
)
}
}
fn
matches_exact
(
regex
:
&
str
,
input
:
&
str
)
->
Option
<
Vec
<
(
usize
,
usize
)
>>
{
fn
matches_exact
(
regex
:
&
str
,
input
:
&
str
)
->
Option
<
Box
<
[
Option
<
(
usize
,
usize
)
>
]
>
>
{
compile
(
regex
)
compile
(
regex
)
.matches_exact
(
input
.as_bytes
())
.matches_exact
(
input
.as_bytes
())
}
}
...
@@ -570,55 +583,63 @@ mod tests {
...
@@ -570,55 +583,63 @@ mod tests {
fn
offsets
()
{
fn
offsets
()
{
assert_eq!
(
assert_eq!
(
matches_exact
(
"abc"
,
"abcd"
),
matches_exact
(
"abc"
,
"abcd"
),
Some
(
vec!
[
(
0
,
3
)])
Some
(
abox!
[
Some
(
(
0
,
3
)
)
])
);
);
assert_eq!
(
assert_eq!
(
matches_exact
(
r"[[:alpha:]]\+"
,
"abcde12345"
),
matches_exact
(
r"[[:alpha:]]\+"
,
"abcde12345"
),
Some
(
vec!
[
(
0
,
5
)])
Some
(
abox!
[
Some
(
(
0
,
5
)
)
])
);
);
assert_eq!
(
assert_eq!
(
matches_exact
(
r"a\(bc\)\+d"
,
"abcbcd"
),
matches_exact
(
r"a\(bc\)\+d"
,
"abcbcd"
),
Some
(
vec!
[
(
0
,
6
),
(
3
,
5
)])
Some
(
abox!
[
Some
(
(
0
,
6
)
)
,
Some
(
(
3
,
5
)
)
])
);
);
assert_eq!
(
assert_eq!
(
matches_exact
(
r"hello\( \(world\|universe\) :D\)\?!"
,
"hello world :D!"
),
matches_exact
(
r"hello\( \(world\|universe\) :D\)\?!"
,
"hello world :D!"
),
Some
(
vec!
[
(
0
,
15
),
(
5
,
14
),
(
6
,
11
)])
Some
(
abox!
[
Some
(
(
0
,
15
)
)
,
Some
(
(
5
,
14
)
)
,
Some
(
(
6
,
11
)
)
])
);
);
assert_eq!
(
assert_eq!
(
matches_exact
(
r"hello\( \(world\|universe\) :D\)\?"
,
"hello world :D"
),
matches_exact
(
r"hello\( \(world\|universe\) :D\)\?"
,
"hello world :D"
),
Some
(
vec!
[
(
0
,
14
),
(
5
,
14
),
(
6
,
11
)])
Some
(
abox!
[
Some
(
(
0
,
14
)
)
,
Some
(
(
5
,
14
)
)
,
Some
(
(
6
,
11
)
)
])
);
);
assert_eq!
(
assert_eq!
(
matches_exact
(
r"\(\<hello\>\) world"
,
"hello world"
),
matches_exact
(
r"\(\<hello\>\) world"
,
"hello world"
),
Some
(
vec!
[
(
0
,
11
),
(
0
,
5
)])
Some
(
abox!
[
Some
(
(
0
,
11
)
)
,
Some
(
(
0
,
5
)
)
])
);
);
assert_eq!
(
assert_eq!
(
matches_exact
(
r".*d"
,
"hid howd ared youd"
),
matches_exact
(
r".*d"
,
"hid howd ared youd"
),
Some
(
vec!
[
(
0
,
18
)])
Some
(
abox!
[
Some
(
(
0
,
18
)
)
])
);
);
assert_eq!
(
assert_eq!
(
matches_exact
(
r".*\(a\)"
,
"bbbbba"
),
matches_exact
(
r".*\(a\)"
,
"bbbbba"
),
Some
(
vec!
[
(
0
,
6
),
(
5
,
6
)])
Some
(
abox!
[
Some
(
(
0
,
6
)
)
,
Some
(
(
5
,
6
)
)
])
);
);
assert_eq!
(
assert_eq!
(
matches_exact
(
r"\(a \(b\) \(c\)\) \(d\)"
,
"a b c d"
),
matches_exact
(
r"\(a \(b\) \(c\)\) \(d\)"
,
"a b c d"
),
Some
(
vec!
[
(
0
,
7
),
(
0
,
5
),
(
2
,
3
),
(
4
,
5
),
(
6
,
7
)])
Some
(
abox!
[
Some
(
(
0
,
7
)
)
,
Some
(
(
0
,
5
)
)
,
Some
(
(
2
,
3
)
)
,
Some
(
(
4
,
5
)
)
,
Some
(
(
6
,
7
)
)
])
);
);
assert_eq!
(
assert_eq!
(
matches_exact
(
r"\(.\)*"
,
"hello"
),
matches_exact
(
r"\(.\)*"
,
"hello"
),
Some
(
vec!
[
(
0
,
5
),
(
4
,
5
)])
Some
(
abox!
[
Some
(
(
0
,
5
)
)
,
Some
(
(
4
,
5
)
)
])
);
);
assert_eq!
(
assert_eq!
(
matches
(
"h
i
"
,
"hello hi lol"
),
matches
(
r
"h
\(i\)
"
,
"hello hi lol"
),
vec!
(
vec!
[(
6
,
8
)])
vec!
(
abox!
[
Some
((
6
,
8
)),
Some
((
7
,
8
)
)])
);
);
assert_eq!
(
assert_eq!
(
matches_exact
(
r"\(\([[:alpha:]]\)*\)"
,
"abcdefg"
),
matches_exact
(
r"\(\([[:alpha:]]\)*\)"
,
"abcdefg"
),
Some
(
vec!
[
(
0
,
7
),
(
0
,
7
),
(
6
,
7
)])
Some
(
abox!
[
Some
(
(
0
,
7
)
)
,
Some
(
(
0
,
7
)
)
,
Some
(
(
6
,
7
)
)
])
);
);
assert_eq!
(
assert_eq!
(
matches_exact
(
r"\(\.\([[:alpha:]]\)\)*"
,
".a.b.c.d.e.f.g"
),
matches_exact
(
r"\(\.\([[:alpha:]]\)\)*"
,
".a.b.c.d.e.f.g"
),
Some
(
vec!
[(
0
,
14
),
(
12
,
14
),
(
13
,
14
)])
Some
(
abox!
[
Some
((
0
,
14
)),
Some
((
12
,
14
)),
Some
((
13
,
14
))])
);
assert_eq!
(
matches_exact
(
r"\(a\|\(b\)\)*\(c\)"
,
"bababac"
),
Some
(
abox!
[
Some
((
0
,
7
)),
Some
((
5
,
6
)),
Some
((
4
,
5
)),
Some
((
6
,
7
))])
);
assert_eq!
(
matches_exact
(
r"\(a\|\(b\)\)*\(c\)"
,
"aaac"
),
Some
(
abox!
[
Some
((
0
,
4
)),
Some
((
2
,
3
)),
None
,
Some
((
3
,
4
))])
);
);
}
}
#[test]
#[test]
...
@@ -725,7 +746,7 @@ mod tests {
...
@@ -725,7 +746,7 @@ mod tests {
#[bench]
#[bench]
fn
speed_matches
(
b
:
&
mut
Bencher
)
{
fn
speed_matches
(
b
:
&
mut
Bencher
)
{
b
.iter
(||
{
b
.iter
(||
{
assert!
(
matches
(
r"\(\(a*\|b\|c\) test\|yee\)"
,
"oooo aaaaa test"
,
None
)
.len
()
==
1
);
assert
_eq
!
(
matches
(
r"\(\(a*\|b\|c\) test\|yee\)"
,
"oooo aaaaa test"
)
.len
()
,
1
);
})
})
}
}
}
}
This diff is collapsed.
Click to expand it.