diff --git a/src/header/wchar/lookaheadreader.rs b/src/header/wchar/lookaheadreader.rs index ddf36a63f5fe2c34f48d07d39e702e5862d8ca51..7f98c9b4c8d37e9fffa8566274ca65765ebc58ba 100644 --- a/src/header/wchar/lookaheadreader.rs +++ b/src/header/wchar/lookaheadreader.rs @@ -1,13 +1,24 @@ use super::{fseek_locked, ftell_locked, FILE, SEEK_SET}; use crate::{ + header::{ + errno::EILSEQ, + wchar::{fgetwc, get_char_encoded_length, mbrtowc, MB_CUR_MAX}, + wctype::WEOF, + }, io::Read, - platform::types::{off_t, wint_t}, + platform::{ + types::{c_char, off_t, wchar_t, wint_t}, + ERRNO, + }, }; +use core::ptr; + struct LookAheadBuffer { buf: *const wint_t, pos: isize, look_ahead: isize, } + impl LookAheadBuffer { fn look_ahead(&mut self) -> Result<Option<wint_t>, i32> { let wchar = unsafe { *self.buf.offset(self.look_ahead) }; @@ -34,20 +45,104 @@ impl From<*const wint_t> for LookAheadBuffer { } } -pub struct LookAheadReader(LookAheadBuffer); +struct LookAheadFile<'a> { + f: &'a mut FILE, + look_ahead: i64, +} -impl LookAheadReader { - pub fn lookahead1(&mut self) -> Result<Option<wint_t>, i32> { - self.0.look_ahead() +impl<'a> LookAheadFile<'a> { + fn look_ahead(&mut self) -> Result<Option<wint_t>, i32> { + let buf = &mut [0; MB_CUR_MAX as usize]; + let seek = unsafe { ftell_locked(self.f) }; + unsafe { fseek_locked(self.f, self.look_ahead as off_t, SEEK_SET) }; + + let mut encoded_length = 0; + let mut bytes_read = 0; + loop { + match self.f.read(&mut buf[bytes_read..bytes_read + 1]) { + Ok(0) => { + ERRNO.set(EILSEQ); + return Ok(Some(WEOF)); + } + Ok(_) => {} + Err(_) => return Err(-1), + } + + bytes_read += 1; + + if bytes_read == 1 { + encoded_length = if let Some(el) = get_char_encoded_length(buf[0]) { + el + } else { + ERRNO.set(EILSEQ); + return Ok(Some(WEOF)); + }; + } + + if bytes_read >= encoded_length { + break; + } + } + + let mut wc: wchar_t = 0; + unsafe { + mbrtowc( + &mut wc, + buf.as_ptr() as *const c_char, + encoded_length, + ptr::null_mut(), + ); + + fseek_locked(self.f, seek, SEEK_SET); + } + + self.look_ahead += encoded_length as i64; + + Ok(Some(wc as wint_t)) + } + + fn commit(&mut self) { + unsafe { fseek_locked(self.f, self.look_ahead as off_t, SEEK_SET) }; + } +} + +impl<'a> From<&'a mut FILE> for LookAheadFile<'a> { + fn from(f: &'a mut FILE) -> LookAheadFile<'a> { + let look_ahead = unsafe { ftell_locked(f) } as i64; + LookAheadFile { f, look_ahead } } +} + +enum LookAheadReaderEnum<'a> { + FILE(LookAheadFile<'a>), + BUFFER(LookAheadBuffer), +} +pub struct LookAheadReader<'a>(LookAheadReaderEnum<'a>); + +impl LookAheadReader<'_> { + pub fn lookahead1(&mut self) -> Result<Option<wint_t>, i32> { + match &mut self.0 { + LookAheadReaderEnum::FILE(f) => f.look_ahead(), + LookAheadReaderEnum::BUFFER(b) => b.look_ahead(), + } + } pub fn commit(&mut self) { - self.0.commit() + match &mut self.0 { + LookAheadReaderEnum::FILE(f) => f.commit(), + LookAheadReaderEnum::BUFFER(b) => b.commit(), + } + } +} + +impl<'a> From<&'a mut FILE> for LookAheadReader<'a> { + fn from(f: &'a mut FILE) -> LookAheadReader<'a> { + LookAheadReader(LookAheadReaderEnum::FILE(f.into())) } } -impl From<*const wint_t> for LookAheadReader { - fn from(buff: *const wint_t) -> LookAheadReader { - LookAheadReader(buff.into()) +impl<'a> From<*const wint_t> for LookAheadReader<'a> { + fn from(buff: *const wint_t) -> LookAheadReader<'a> { + LookAheadReader(LookAheadReaderEnum::BUFFER(buff.into())) } } diff --git a/src/header/wchar/mod.rs b/src/header/wchar/mod.rs index ff3102aa254a146b1564f4c4636b567cf22545ca..da86794849062eff1419bdb760b3100aa78fe6b5 100644 --- a/src/header/wchar/mod.rs +++ b/src/header/wchar/mod.rs @@ -10,6 +10,7 @@ use crate::{ stdlib::{malloc, MB_CUR_MAX, MB_LEN_MAX}, string, time::*, + wchar::{lookaheadreader::LookAheadReader, utf8::get_char_encoded_length}, wctype::*, }, iter::{NulTerminated, NulTerminatedInclusive}, @@ -69,14 +70,8 @@ pub unsafe extern "C" fn fgetwc(stream: *mut FILE) -> wint_t { bytes_read += 1; if bytes_read == 1 { - encoded_length = if buf[0] >> 7 == 0 { - 1 - } else if buf[0] >> 5 == 6 { - 2 - } else if buf[0] >> 4 == 0xe { - 3 - } else if buf[0] >> 3 == 0x1e { - 4 + encoded_length = if let Some(el) = get_char_encoded_length(buf[0]) { + el } else { ERRNO.set(EILSEQ); return WEOF; @@ -1006,9 +1001,21 @@ pub unsafe extern "C" fn wmemset(ws: *mut wchar_t, wc: wchar_t, n: size_t) -> *m ws } -// #[no_mangle] -pub extern "C" fn wscanf(format: *const wchar_t, ap: va_list) -> c_int { - unimplemented!(); +#[no_mangle] +pub unsafe extern "C" fn vwscanf(format: *const wchar_t, __valist: va_list) -> c_int { + let mut file = (*stdin).lock(); + if let Err(_) = file.try_set_byte_orientation_unlocked() { + return -1; + } + + let f: &mut FILE = &mut *file; + let reader: LookAheadReader = f.into(); + wscanf::scanf(reader, format, __valist) +} + +#[no_mangle] +pub unsafe extern "C" fn wscanf(format: *const wchar_t, mut __valist: ...) -> c_int { + vwscanf(format, __valist.as_va_list()) } #[no_mangle] diff --git a/src/header/wchar/utf8.rs b/src/header/wchar/utf8.rs index b1961c1ac85f73568fd2811c618aeef30cc9c015..bb127427e012e366e28474d20bc1922d40437ccb 100644 --- a/src/header/wchar/utf8.rs +++ b/src/header/wchar/utf8.rs @@ -91,3 +91,18 @@ pub unsafe fn wcrtomb(s: *mut c_char, wc: wchar_t, ps: *mut mbstate_t) -> usize size } + +/// Gets the encoded length of a character. It is used to recognize wide characters +pub fn get_char_encoded_length(first_byte: u8) -> Option<usize> { + if first_byte >> 7 == 0 { + Some(1) + } else if first_byte >> 5 == 6 { + Some(2) + } else if first_byte >> 4 == 0xe { + Some(3) + } else if first_byte >> 3 == 0x1e { + Some(4) + } else { + None + } +}