diff --git a/src/header/time/mod.rs b/src/header/time/mod.rs index 0abe45a6800d4c89232a2b07760a4d557b428c91..5de0898acf0283bd414376aa04d82fda9b0e72f1 100644 --- a/src/header/time/mod.rs +++ b/src/header/time/mod.rs @@ -22,7 +22,9 @@ use core::{ pub use self::constants::*; pub mod constants; + mod strftime; +mod strptime; const YEARS_PER_ERA: time_t = 400; const DAYS_PER_ERA: time_t = 146097; @@ -465,11 +467,6 @@ pub unsafe extern "C" fn strftime( } } -// #[no_mangle] -pub extern "C" fn strptime(buf: *const c_char, format: *const c_char, tm: *mut tm) -> *mut c_char { - unimplemented!(); -} - #[no_mangle] pub unsafe extern "C" fn time(tloc: *mut time_t) -> time_t { let mut ts = timespec::default(); diff --git a/src/header/time/strptime.rs b/src/header/time/strptime.rs new file mode 100644 index 0000000000000000000000000000000000000000..5ffaf94b2ee4f189d327dfbe48d5ed44e2c4cd4c --- /dev/null +++ b/src/header/time/strptime.rs @@ -0,0 +1,533 @@ +// https://pubs.opengroup.org/onlinepubs/7908799/xsh/strptime.html + +use alloc::{string::String, vec::Vec}; +use core::{ + ffi::{c_char, c_int}, + mem::MaybeUninit, + ptr, slice, str, +}; + +use crate::{header::time::tm, platform::types::size_t}; + +/// For convenience, we define some helper constants for the C-locale. +static SHORT_DAYS: [&str; 7] = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"]; +static LONG_DAYS: [&str; 7] = [ + "Sunday", + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", +]; +static SHORT_MONTHS: [&str; 12] = [ + "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", +]; +static LONG_MONTHS: [&str; 12] = [ + "January", + "February", + "March", + "April", + "May", + "June", + "July", + "August", + "September", + "October", + "November", + "December", +]; + +// Macro for matching a character ignoring ASCII case +macro_rules! eq_icase { + ($c1:expr, $c2:expr) => { + $c1.eq_ignore_ascii_case(&$c2) + }; +} + +#[no_mangle] +pub extern "C" fn strptime(buf: *const c_char, format: *const c_char, tm: *mut tm) -> *mut c_char { + unsafe { + if buf.is_null() || format.is_null() || tm.is_null() { + return ptr::null_mut(); + } + + // Convert raw pointers into Rust slices/strings. + let mut input_str = cstr_to_str(buf); + let fmt_str = cstr_to_str(format); + + // Zero-initialize the output `tm` structure + // (equivalent to: tm_sec=0, tm_min=0, tm_hour=0, etc.) + ptr::write_bytes(tm, 0, 1); + + // We parse the format specifiers in a loop + let mut fmt_chars = fmt_str.chars().peekable(); + let mut index_in_input = 0; + + while let Some(fc) = fmt_chars.next() { + if fc != '%' { + // If it's a normal character, we expect it to match exactly in input + if input_str.len() <= index_in_input { + return ptr::null_mut(); // input ended too soon + } + let in_char = input_str.as_bytes()[index_in_input] as char; + if in_char != fc { + // mismatch + return ptr::null_mut(); + } + index_in_input += 1; + continue; + } + + // If we see '%', read the next character (format specifier) + let Some(spec) = fmt_chars.next() else { + // format string ended abruptly after '%' + return ptr::null_mut(); + }; + + // POSIX says `%E` or `%O` are "modified" specifiers for locale. + // We'll skip them if they appear (like strftime does) and read the next char. + let final_spec = if spec == 'E' || spec == 'O' { + match fmt_chars.next() { + Some(ch) => ch, + None => return ptr::null_mut(), + } + } else { + spec + }; + + // Handle known specifiers + match final_spec { + /////////////////////////// + // Whitespace: %n or %t // + /////////////////////////// + 'n' | 't' => { + // Skip over any whitespace in the input + while index_in_input < input_str.len() + && input_str.as_bytes()[index_in_input].is_ascii_whitespace() + { + index_in_input += 1; + } + } + + /////////////////////////// + // Literal % => "%%" // + /////////////////////////// + '%' => { + if index_in_input >= input_str.len() + || input_str.as_bytes()[index_in_input] as char != '%' + { + return ptr::null_mut(); + } + index_in_input += 1; + } + + /////////////////////////// + // Day of Month: %d / %e // + /////////////////////////// + 'd' | 'e' => { + // parse a 2-digit day (with or without leading zero) + let (val, len) = match parse_int(&input_str[index_in_input..], 2, false) { + Some(v) => v, + None => return ptr::null_mut(), + }; + (*tm).tm_mday = val as c_int; + index_in_input += len; + } + + /////////////////////////// + // Month: %m // + /////////////////////////// + 'm' => { + // parse a 2-digit month + let (val, len) = match parse_int(&input_str[index_in_input..], 2, false) { + Some(v) => v, + None => return ptr::null_mut(), + }; + // tm_mon is 0-based (0 = Jan, 1 = Feb,...) + (*tm).tm_mon = (val as c_int) - 1; + if (*tm).tm_mon < 0 || (*tm).tm_mon > 11 { + return ptr::null_mut(); + } + index_in_input += len; + } + + /////////////////////////// + // Year without century: %y + /////////////////////////// + 'y' => { + // parse a 2-digit year + let (val, len) = match parse_int(&input_str[index_in_input..], 2, false) { + Some(v) => v, + None => return ptr::null_mut(), + }; + // According to POSIX, %y in strptime is [00,99], and the "year" is 1900..1999 for [00..99], + // but the standard says: "values in [69..99] refer to 1969..1999, [00..68] => 2000..2068" + let fullyear = if val >= 69 { val + 1900 } else { val + 2000 }; + (*tm).tm_year = (fullyear - 1900) as c_int; + index_in_input += len; + } + + /////////////////////////// + // Year with century: %Y + /////////////////////////// + 'Y' => { + // parse up to 4-digit (or more) year + // We allow more than 4 digits if needed + let (val, len) = match parse_int(&input_str[index_in_input..], 4, true) { + Some(v) => v, + None => return ptr::null_mut(), + }; + (*tm).tm_year = (val as c_int) - 1900; + index_in_input += len; + } + + /////////////////////////// + // Hour (00..23): %H // + /////////////////////////// + 'H' => { + let (val, len) = match parse_int(&input_str[index_in_input..], 2, false) { + Some(v) => v, + None => return ptr::null_mut(), + }; + if val > 23 { + return ptr::null_mut(); + } + (*tm).tm_hour = val as c_int; + index_in_input += len; + } + + /////////////////////////// + // Hour (01..12): %I // + /////////////////////////// + 'I' => { + let (val, len) = match parse_int(&input_str[index_in_input..], 2, false) { + Some(v) => v, + None => return ptr::null_mut(), + }; + if val < 1 || val > 12 { + return ptr::null_mut(); + } + (*tm).tm_hour = val as c_int; + // We’ll interpret AM/PM with %p if it appears later + index_in_input += len; + } + + /////////////////////////// + // Minute (00..59): %M // + /////////////////////////// + 'M' => { + let (val, len) = match parse_int(&input_str[index_in_input..], 2, false) { + Some(v) => v, + None => return ptr::null_mut(), + }; + if val > 59 { + return ptr::null_mut(); + } + (*tm).tm_min = val as c_int; + index_in_input += len; + } + + /////////////////////////// + // Seconds (00..60): %S // + /////////////////////////// + 'S' => { + let (val, len) = match parse_int(&input_str[index_in_input..], 2, false) { + Some(v) => v, + None => return ptr::null_mut(), + }; + if val > 60 { + return ptr::null_mut(); + } + (*tm).tm_sec = val as c_int; + index_in_input += len; + } + + /////////////////////////// + // AM/PM: %p // + /////////////////////////// + 'p' => { + // Parse either "AM" or "PM" (case-insensitive) + // We'll read up to 2 or 3 letters from input ("AM", "PM") + let leftover = &input_str[index_in_input..]; + let parsed_len = match parse_am_pm(leftover) { + Some((is_pm, used)) => { + if (*tm).tm_hour == 12 { + // 12 AM => 00:xx, 12 PM => 12:xx + (*tm).tm_hour = if is_pm { 12 } else { 0 }; + } else { + // 1..11 AM => 1..11, 1..11 PM => 13..23 + if is_pm { + (*tm).tm_hour += 12; + } + } + used + } + None => return ptr::null_mut(), + }; + index_in_input += parsed_len; + } + + /////////////////////////// + // Weekday Name: %a/%A // + /////////////////////////// + 'a' => { + // Abbreviated day name (Sun..Sat) + let leftover = &input_str[index_in_input..]; + let parsed_len = match parse_weekday(leftover, true) { + Some((wday, used)) => { + (*tm).tm_wday = wday as c_int; + used + } + None => return ptr::null_mut(), + }; + index_in_input += parsed_len; + } + 'A' => { + // Full day name (Sunday..Saturday) + let leftover = &input_str[index_in_input..]; + let parsed_len = match parse_weekday(leftover, false) { + Some((wday, used)) => { + (*tm).tm_wday = wday as c_int; + used + } + None => return ptr::null_mut(), + }; + index_in_input += parsed_len; + } + + /////////////////////////// + // Month Name: %b/%B/%h // + /////////////////////////// + 'b' | 'h' => { + // Abbreviated month name + let leftover = &input_str[index_in_input..]; + let parsed_len = match parse_month(leftover, true) { + Some((mon, used)) => { + (*tm).tm_mon = mon as c_int; + used + } + None => return ptr::null_mut(), + }; + index_in_input += parsed_len; + } + 'B' => { + // Full month name + let leftover = &input_str[index_in_input..]; + let parsed_len = match parse_month(leftover, false) { + Some((mon, used)) => { + (*tm).tm_mon = mon as c_int; + used + } + None => return ptr::null_mut(), + }; + index_in_input += parsed_len; + } + + /////////////////////////// + // Day of year: %j // + /////////////////////////// + 'j' => { + // parse 3-digit day of year [001..366] + let (val, len) = match parse_int(&input_str[index_in_input..], 3, false) { + Some(v) => v, + None => return ptr::null_mut(), + }; + if val < 1 || val > 366 { + return ptr::null_mut(); + } + // store in tm_yday + (*tm).tm_yday = (val - 1) as c_int; + index_in_input += len; + } + + /////////////////////////// + // Date shortcuts: %D, %F, etc. + /////////////////////////// + 'D' => { + // Equivalent to "%m/%d/%y" + // We can do a mini strptime recursion or manually parse + // For simplicity, we'll do it inline here + let subfmt = "%m/%d/%y"; + let used = match apply_subformat(&input_str[index_in_input..], subfmt, tm) { + Some(v) => v, + None => return ptr::null_mut(), + }; + index_in_input += used; + } + 'F' => { + // Equivalent to "%Y-%m-%d" + let subfmt = "%Y-%m-%d"; + let used = match apply_subformat(&input_str[index_in_input..], subfmt, tm) { + Some(v) => v, + None => return ptr::null_mut(), + }; + index_in_input += used; + } + + /////////////////////////// + // Not implemented: %x, %X, %c, %r, %R, %T, etc. + /////////////////////////// + // If you want to implement these, do similarly to %D / %F or parse manually + 'x' | 'X' | 'c' | 'r' | 'R' | 'T' => { + // For brevity, we skip these. You can expand similarly. + // Return NULL if we don’t want to accept them: + return ptr::null_mut(); + } + + /////////////////////////// + // Timezone: %Z or %z + /////////////////////////// + 'Z' | 'z' => { + // Full/abbrev time zone name or numeric offset + // Implementation omitted. Real support is quite complicated. + return ptr::null_mut(); + } + + ////////// + // else // + ////////// + _ => { + // We do not recognize this specifier + return ptr::null_mut(); + } + } + } + + // If we got here, parsing was successful. Return pointer to the + // next unparsed character in `buf`. + let ret_ptr = buf.add(index_in_input); + ret_ptr as *mut c_char + } +} + +// ----------------------- +// Helper / Parsing Logic +// ----------------------- + +/// Convert a C char pointer to a Rust &str (assuming it's valid UTF-8). +/// Returns an empty string if invalid. +unsafe fn cstr_to_str<'a>(ptr: *const c_char) -> &'a str { + if ptr.is_null() { + return ""; + } + let len = strlen(ptr); + let bytes = slice::from_raw_parts(ptr as *const u8, len); + str::from_utf8(bytes).unwrap_or("") +} + +/// Minimal strlen for C-strings +unsafe fn strlen(mut ptr: *const c_char) -> usize { + let mut count = 0; + while !ptr.is_null() && *ptr != 0 { + ptr = ptr.add(1); + count += 1; + } + count +} + +/// Parse an integer from the beginning of `input_str`. +/// +/// - `width` is the maximum number of digits to parse +/// - `allow_variable_width` indicates if we can parse fewer digits +/// (e.g., `%Y` can have more than 4 digits, but also might parse "2023" or "12345"). +fn parse_int(input_str: &str, width: usize, allow_variable_width: bool) -> Option<(i32, usize)> { + let mut count = 0; + let mut value: i32 = 0; + let chars: Vec<char> = input_str.chars().collect(); + + for c in chars.iter() { + if !c.is_ascii_digit() { + break; + } + value = value * 10 + (*c as u8 as i32 - '0' as i32); + count += 1; + if count == width && !allow_variable_width { + break; + } + } + if count == 0 { + return None; // no digits found + } + Some((value, count)) +} + +/// Handle AM/PM. Returns (is_pm, length_consumed). +/// Accepts "AM", "am", "PM", "pm" case-insensitively. +fn parse_am_pm(s: &str) -> Option<(bool, usize)> { + // Trim leading whitespace? + // For simplicity, we do not. If needed, handle it. + let s_up = s.to_ascii_uppercase(); + if s_up.starts_with("AM") { + return Some((false, 2)); + } + if s_up.starts_with("PM") { + return Some((true, 2)); + } + None +} + +/// Parse a weekday name from `s`. +/// - If `abbrev == true`, match short forms: "Sun".."Sat" +/// - Otherwise, match "Sunday".."Saturday" +/// Return (weekday_index, length_consumed). +fn parse_weekday(s: &str, abbrev: bool) -> Option<(usize, usize)> { + let list = if abbrev { &SHORT_DAYS } else { &LONG_DAYS }; + for (i, name) in list.iter().enumerate() { + if s.len() >= name.len() && s[0..name.len()].eq_ignore_ascii_case(name) { + return Some((i, name.len())); + } + } + None +} + +/// Parse a month name from `s`. +/// - If `abbrev == true`, match short forms: "Jan".."Dec" +/// - Otherwise, match "January".."December" +/// Return (month_index, length_consumed). +fn parse_month(s: &str, abbrev: bool) -> Option<(usize, usize)> { + let list = if abbrev { &SHORT_MONTHS } else { &LONG_MONTHS }; + for (i, name) in list.iter().enumerate() { + if s.len() >= name.len() && s[0..name.len()].eq_ignore_ascii_case(name) { + return Some((i, name.len())); + } + } + None +} + +/// Apply a small subformat (like "%m/%d/%y" or "%Y-%m-%d") to `input`. +/// Return how many characters of `input` were consumed or None on error. +unsafe fn apply_subformat(input: &str, subfmt: &str, tm: *mut tm) -> Option<usize> { + // We'll do a temporary strptime call on a substring. + // Then we see how many chars it consumed. + // If that call fails, we return None. + // Otherwise, we return the count. + + // Convert `input` to a null-terminated buffer temporarily + let mut tmpbuf = String::with_capacity(input.len() + 1); + tmpbuf.push_str(input); + tmpbuf.push('\0'); + + let mut tmpfmt = String::with_capacity(subfmt.len() + 1); + tmpfmt.push_str(subfmt); + tmpfmt.push('\0'); + + // We need a copy of the tm, so if partial parse fails, we don't override. + let old_tm = ptr::read(tm); // backup + + let consumed_ptr = strptime( + tmpbuf.as_ptr() as *const c_char, + tmpfmt.as_ptr() as *const c_char, + tm, + ); + + if consumed_ptr.is_null() { + // revert + *tm = old_tm; + return None; + } + + // consumed_ptr - tmpbuf.as_ptr() => # of bytes consumed + let diff = (consumed_ptr as usize) - (tmpbuf.as_ptr() as usize); + Some(diff) +}