Skip to content
Snippets Groups Projects
Commit 198c5502 authored by Nicolás Antinori's avatar Nicolás Antinori Committed by Jeremy Soller
Browse files

swscanf and vswscanf implementation proposal

parent ce818157
No related branches found
No related tags found
No related merge requests found
use super::{fseek_locked, ftell_locked, FILE, SEEK_SET};
use crate::{
io::Read,
platform::types::{off_t, wint_t},
};
struct LookAheadBuffer {
buf: *const wint_t,
pos: isize,
look_ahead: isize,
}
impl LookAheadBuffer {
fn look_ahead(&mut self) -> Result<Option<wint_t>, i32> {
let wchar = unsafe { *self.buf.offset(self.look_ahead) };
if wchar == 0 {
Ok(None)
} else {
self.look_ahead += 1;
Ok(Some(wchar))
}
}
fn commit(&mut self) {
self.pos = self.look_ahead;
}
}
impl From<*const wint_t> for LookAheadBuffer {
fn from(buff: *const wint_t) -> LookAheadBuffer {
LookAheadBuffer {
buf: buff,
pos: 0,
look_ahead: 0,
}
}
}
pub struct LookAheadReader(LookAheadBuffer);
impl LookAheadReader {
pub fn lookahead1(&mut self) -> Result<Option<wint_t>, i32> {
self.0.look_ahead()
}
pub fn commit(&mut self) {
self.0.commit()
}
}
impl From<*const wint_t> for LookAheadReader {
fn from(buff: *const wint_t) -> LookAheadReader {
LookAheadReader(buff.into())
}
}
......@@ -15,8 +15,10 @@ use crate::{
platform::{self, types::*, ERRNO},
};
mod lookaheadreader;
mod utf8;
mod wprintf;
mod wscanf;
#[repr(C)]
#[derive(Clone, Copy)]
......@@ -277,9 +279,23 @@ pub unsafe extern "C" fn putwchar(wc: wchar_t) -> wint_t {
fputwc(wc, &mut *stdout)
}
// #[no_mangle]
pub extern "C" fn swscanf(s: *const wchar_t, format: *const wchar_t, ap: va_list) -> c_int {
unimplemented!();
#[no_mangle]
pub unsafe extern "C" fn vswscanf(
s: *const wchar_t,
format: *const wchar_t,
__valist: va_list,
) -> c_int {
let reader = (s as *const wint_t).into();
wscanf::scanf(reader, format, __valist)
}
#[no_mangle]
pub unsafe extern "C" fn swscanf(
s: *const wchar_t,
format: *const wchar_t,
mut __valist: ...
) -> c_int {
vswscanf(s, format, __valist.as_va_list())
}
/// Push wide character `wc` back onto `stream` so it'll be read next
......
use super::lookaheadreader::LookAheadReader;
use crate::platform::types::*;
use alloc::{string::String, vec::Vec};
use core::ffi::VaList as va_list;
#[derive(PartialEq, Eq)]
enum IntKind {
Byte,
Short,
Int,
Long,
LongLong,
IntMax,
PtrDiff,
Size,
}
#[derive(PartialEq, Eq)]
enum CharKind {
Ascii,
Wide,
}
/// Helper function for progressing a C string
unsafe fn next_char(string: &mut *const wchar_t) -> Result<wint_t, c_int> {
let c = **string as wint_t;
*string = string.offset(1);
if c == 0 {
Err(-1)
} else {
Ok(c)
}
}
macro_rules! wc_as_char {
($c:ident) => {
char::try_from($c).map_err(|_| -1)?
};
}
unsafe fn inner_scanf(
mut r: LookAheadReader,
mut format: *const wchar_t,
mut ap: va_list,
) -> Result<c_int, c_int> {
let mut matched = 0;
let mut wchar = 0;
let mut skip_read = false;
let mut count = 0;
macro_rules! read {
() => {{
match r.lookahead1() {
Ok(None) => false,
Ok(Some(b)) => {
wchar = b;
count += 1;
true
}
Err(x) => return Err(x),
}
}};
}
macro_rules! maybe_read {
() => {
maybe_read!(inner false);
};
(noreset) => {
maybe_read!(inner);
};
(inner $($placeholder:expr)*) => {
if !skip_read && !read!() {
match matched {
0 => return Ok(-1),
a => return Ok(a),
}
}
$(else {
// Hacky way of having this optional
skip_read = $placeholder;
})*
}
}
while *format != 0 {
let mut c = next_char(&mut format)?;
if c as u8 == b' ' {
maybe_read!(noreset);
while (wc_as_char!(wchar)).is_whitespace() {
if !read!() {
return Ok(matched);
}
}
skip_read = true;
} else if c as u8 != b'%' {
maybe_read!();
if c != wchar {
return Ok(matched);
}
r.commit();
} else {
c = next_char(&mut format)?;
let mut ignore = false;
if c as u8 == b'*' {
ignore = true;
c = next_char(&mut format)?;
}
let mut width = String::new();
while c as u8 >= b'0' && c as u8 <= b'9' {
width.push(wc_as_char!(c));
c = next_char(&mut format)?;
}
let mut width = if width.is_empty() {
None
} else {
match width.parse::<usize>() {
Ok(n) => Some(n),
Err(_) => return Err(-1),
}
};
// When an EOF occurs, eof is set, stuff is marked matched
// as usual, and finally it is returned
let mut eof = false;
let mut kind = IntKind::Int;
let mut c_kind = CharKind::Ascii;
loop {
match c as u8 {
b'h' => {
if kind == IntKind::Short || kind == IntKind::Byte {
kind = IntKind::Byte;
} else {
kind = IntKind::Short;
}
}
b'j' => kind = IntKind::IntMax,
b'l' => {
if kind == IntKind::Long || kind == IntKind::LongLong {
kind = IntKind::LongLong;
} else {
kind = IntKind::Long;
}
}
b'q' | b'L' => kind = IntKind::LongLong,
b't' => kind = IntKind::PtrDiff,
b'z' => kind = IntKind::Size,
// If kind is Long, means we found a 'l' before finding 'c' or 's'. In this
// case the format corresponds to a wide char/string
b'c' | b's' if kind == IntKind::Long => {
c_kind = CharKind::Wide;
break;
}
_ => break,
}
c = next_char(&mut format)?;
}
if c as u8 != b'n' {
maybe_read!(noreset);
}
match c as u8 {
b'%' => {
while (wc_as_char!(wchar)).is_whitespace() {
if !read!() {
return Ok(matched);
}
}
if wchar as u8 != b'%' {
return Err(matched);
} else if !read!() {
return Ok(matched);
}
}
b'd' | b'i' | b'o' | b'u' | b'x' | b'X' | b'f' | b'e' | b'g' | b'E' | b'a'
| b'p' => {
while (wc_as_char!(wchar)).is_whitespace() {
if !read!() {
return Ok(matched);
}
}
let pointer = c as u8 == b'p';
// Pointers aren't automatic, but we do want to parse "0x"
let auto = c as u8 == b'i' || pointer;
let float = c as u8 == b'f'
|| c as u8 == b'e'
|| c as u8 == b'g'
|| c as u8 == b'E'
|| c as u8 == b'a';
let mut radix = match c as u8 {
b'o' => 8,
b'x' | b'X' | b'p' => 16,
_ => 10,
};
let mut n = String::new();
let mut dot = false;
while width.map(|w| w > 0).unwrap_or(true)
&& ((wchar as u8 >= b'0' && wchar as u8 <= b'7')
|| (radix >= 10 && (wchar as u8 >= b'8' && wchar as u8 <= b'9'))
|| (float && !dot && wchar as u8 == b'.')
|| (radix == 16
&& ((wchar as u8 >= b'a' && wchar as u8 <= b'f')
|| (wchar as u8 >= b'A' && wchar as u8 <= b'F'))))
{
if auto
&& n.is_empty()
&& wchar as u8 == b'0'
&& width.map(|w| w > 0).unwrap_or(true)
{
if !pointer {
radix = 8;
}
width = width.map(|w| w - 1);
if !read!() {
return Ok(matched);
}
if width.map(|w| w > 0).unwrap_or(true)
&& (wchar as u8 == b'x' || wchar as u8 == b'X')
{
radix = 16;
width = width.map(|w| w - 1);
if width.map(|w| w > 0).unwrap_or(true) && !read!() {
return Ok(matched);
}
}
continue;
}
if wchar as u8 == b'.' {
// Don't allow another dot
dot = true;
}
n.push(wc_as_char!(wchar));
r.commit();
width = width.map(|w| w - 1);
if width.map(|w| w > 0).unwrap_or(true) && !read!() {
break;
}
}
macro_rules! parse_type {
(noformat $type:ident) => {{
let n = if n.is_empty() {
0 as $type
} else {
n.parse::<$type>().map_err(|_| 0)?
};
if !ignore {
*ap.arg::<*mut $type>() = n;
matched += 1;
}
}};
(c_double) => {
parse_type!(noformat c_double)
};
(c_float) => {
parse_type!(noformat c_float)
};
($type:ident) => {
parse_type!($type, $type)
};
($type:ident, $final:ty) => {{
let n = if n.is_empty() {
0 as $type
} else {
$type::from_str_radix(&n, radix).map_err(|_| 0)?
};
if !ignore {
*ap.arg::<*mut $final>() = n as $final;
matched += 1;
}
}};
}
if float {
if kind == IntKind::Long || kind == IntKind::LongLong {
parse_type!(c_double);
} else {
parse_type!(c_float);
}
} else if c as u8 == b'p' {
parse_type!(size_t, *mut c_void);
} else {
let unsigned = c as u8 == b'o'
|| c as u8 == b'u'
|| c as u8 == b'x'
|| c as u8 == b'X';
match kind {
IntKind::Byte => {
if unsigned {
parse_type!(c_uchar);
} else {
parse_type!(c_char);
}
}
IntKind::Short => {
if unsigned {
parse_type!(c_ushort)
} else {
parse_type!(c_short)
}
}
IntKind::Int => {
if unsigned {
parse_type!(c_uint)
} else {
parse_type!(c_int)
}
}
IntKind::Long => {
if unsigned {
parse_type!(c_ulong)
} else {
parse_type!(c_long)
}
}
IntKind::LongLong => {
if unsigned {
parse_type!(c_ulonglong)
} else {
parse_type!(c_longlong)
}
}
IntKind::IntMax => {
if unsigned {
parse_type!(uintmax_t)
} else {
parse_type!(intmax_t)
}
}
IntKind::PtrDiff => parse_type!(ptrdiff_t),
IntKind::Size => {
if unsigned {
parse_type!(size_t)
} else {
parse_type!(ssize_t)
}
}
}
}
}
b's' => {
macro_rules! parse_string_type {
($type:ident) => {
while (wc_as_char!(wchar)).is_whitespace() {
if !read!() {
return Ok(matched);
}
}
let mut ptr: Option<*mut $type> =
if ignore { None } else { Some(ap.arg()) };
while width.map(|w| w > 0).unwrap_or(true)
&& !(wc_as_char!(wchar)).is_whitespace()
{
if let Some(ref mut ptr) = ptr {
**ptr = wchar as $type;
*ptr = ptr.offset(1);
}
width = width.map(|w| w - 1);
if width.map(|w| w > 0).unwrap_or(true) && !read!() {
eof = true;
break;
}
}
if let Some(ptr) = ptr {
*ptr = 0;
matched += 1;
r.commit();
}
};
}
if c_kind == CharKind::Ascii {
parse_string_type!(c_char);
} else {
parse_string_type!(wchar_t);
}
}
b'c' => {
macro_rules! parse_char_type {
($type:ident) => {
let ptr: Option<*mut $type> =
if ignore { None } else { Some(ap.arg()) };
for i in 0..width.unwrap_or(1) {
if let Some(ptr) = ptr {
*ptr.add(i) = wchar as $type;
}
width = width.map(|w| w - 1);
if width.map(|w| w > 0).unwrap_or(true) && !read!() {
eof = true;
break;
}
}
if ptr.is_some() {
matched += 1;
r.commit();
}
};
}
if c_kind == CharKind::Ascii {
parse_char_type!(c_char);
} else {
parse_char_type!(wchar_t);
}
}
b'[' => {
c = next_char(&mut format)?;
let mut matches = Vec::new();
let invert = if c as u8 == b'^' {
c = next_char(&mut format)?;
true
} else {
false
};
let mut prev;
loop {
matches.push(c);
prev = c;
c = next_char(&mut format)?;
if c as u8 == b'-' {
if prev as u8 == b']' {
continue;
}
c = next_char(&mut format)?;
if c as u8 == b']' {
matches.push('-' as wint_t);
break;
}
prev += 1;
while prev < c {
matches.push(prev);
prev += 1;
}
} else if c as u8 == b']' {
break;
}
}
let mut ptr: Option<*mut c_char> = if ignore { None } else { Some(ap.arg()) };
// While we haven't used up all the width, and it matches
let mut data_stored = false;
while width.map(|w| w > 0).unwrap_or(true)
&& !invert == matches.contains(&wchar)
{
if let Some(ref mut ptr) = ptr {
**ptr = wchar as c_char;
*ptr = ptr.offset(1);
data_stored = true;
}
r.commit();
// Decrease the width, and read a new character unless the width is 0
width = width.map(|w| w - 1);
if width.map(|w| w > 0).unwrap_or(true) && !read!() {
// Reading a new character has failed, return after
// actually marking this as matched
eof = true;
break;
}
}
if data_stored {
*ptr.unwrap() = 0;
matched += 1;
}
}
b'n' => {
if !ignore {
*ap.arg::<*mut c_int>() = count as c_int;
}
}
_ => return Err(-1),
}
if eof {
return Ok(matched);
}
if width != Some(0) && c as u8 != b'n' {
// It didn't hit the width, so an extra character was read and matched.
// But this character did not match so let's reuse it.
skip_read = true;
}
}
}
Ok(matched)
}
pub unsafe fn scanf(r: LookAheadReader, format: *const wchar_t, ap: va_list) -> c_int {
match inner_scanf(r, format, ap) {
Ok(n) => n,
Err(n) => n,
}
}
......@@ -119,6 +119,7 @@ EXPECT_NAMES=\
wchar/mbsrtowcs \
wchar/printf-on-wchars \
wchar/putwchar \
wchar/wscanf \
wchar/ungetwc \
wchar/wprintf \
wchar/wcrtomb \
......
File added
/* swscanf example */
#include <wchar.h>
#include <stdio.h>
#include "test_helpers.h"
struct params {
short sa;
int ia;
int ib;
int ic;
float fa;
double da;
int *ptr;
char c;
wchar_t wc;
char string1[20];
char string2[20];
char string3[20];
char string4[20];
wchar_t wstring1[20];
wchar_t wstring2[20];
wchar_t wstring3[20];
wchar_t wstring4[20];
};
void test(wchar_t* fmt_in, wchar_t* input, struct params *p, ...) {
va_list args;
va_start(args, p);
wint_t ret = vswscanf(input, fmt_in, args);
va_end(args);
wprintf(
L"%d, { sa: %hhd, ia: %d, ib: %d, ic: %d, fa: %f, da: %lf, ptr: %p, char: %c, wide char: %lc, string1: %s, string2: %s, string3: %s, string4: %s, wstring1: %ls, wstring2: %ls, wstring3: %ls, wstring4: %ls }\n",
ret, p->sa, p->ia, p->ib, p->ic, p->fa, p->da, p->ptr, p->c, p->wc, p->string1, p->string2, p->string3, p->string4, p->wstring1, p->wstring2, p->wstring3, p->wstring4
);
}
int main ()
{
struct params p = { .c = 'a' };
test(L"%hd %d", L"12 345", &p, &p.sa, &p.ia);
test(L"%x %i %i", L"12 0x345 010", &p, &p.ia, &p.ib, &p.ic);
test(L"%f.%lf", L"0.1.0.2", &p, &p.fa, &p.da);
test(L"%p", L"0xABCDEF", &p, &p.ptr);
test(L"%s", L"Hello World", &p, &p.string1);
test(L"%3i", L"0xFF", &p, &p.ia);
test(L"%c%3c", L"hello", &p, &p.c, &p.string1);
test(L"%lc", L"β", &p, &p.wc);
test(L"%lc %f", L"π 3.14", &p, &p.wc, &p.fa);
test(L"test: %2i%n", L"test: 0xFF", &p, &p.ia, &p.ib);
test(L"hello world%%", L"hello world%", &p);
test(L"h%1[ae]ll%1[^a] wor%1[^\n]%[d]", L"hello world", &p, &p.string1, &p.string2, &p.string3, &p.string4);
test(L"h%1[ae]ll%1[^a] wor%1[^\n]%[d]", L"halle worfdddddd", &p, &p.string1, &p.string2, &p.string3, &p.string4);
test(L"%[^a]%[b]", L"testbbbb", &p, &p.string1, &p.string2);
test(L"%ls %ls", L"Привет мир", &p, &p.wstring1, &p.wstring2);
test(L"%ls %ls", L"こんにちは 世界", &p, &p.wstring1, &p.wstring2);
test(L"%ls %d %ls %d", L"αβγ 123 δεζ 456", &p, &p.wstring1, &p.ia, &p.wstring2, &p.ib);
test(L"%ls %s %ls %s", L"αβγ test1 δεζ test2", &p, &p.wstring1, &p.string1, &p.wstring2, &p.string2);
test(L"%ls %ls %ls %ls", L"z ß 水 🍌", &p, &p.wstring1, &p.wstring2, &p.wstring3, &p.wstring4);
// Scanf stolen from the url parsing in curl
wchar_t protobuf[16];
wchar_t slashbuf[4];
wchar_t hostbuf[100];
wchar_t pathbuf[100];
// don't push NUL, make sure scanf does that
memset(protobuf, 97, 16);
memset(slashbuf, 97, 4);
memset(hostbuf, 97, 100);
memset(pathbuf, 97, 100);
int ret = swscanf(
L"https://redox-os.org\0# extra garbage for nul test", L"%15[^\n/:]:%3[/]%[^\n/?#]%[^\n]",
&protobuf, &slashbuf, &hostbuf, &pathbuf
);
if (ret < 4) {
*pathbuf = 0;
}
if (ret < 3) {
*hostbuf = 0;
}
if (ret < 2) {
*slashbuf = 0;
}
if (ret < 1) {
*protobuf = 0;
}
wprintf(L"%d \"%s\" \"%s\" \"%s\" \"%s\"\n", ret, &protobuf, &slashbuf, &hostbuf, &pathbuf);
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment