Skip to content
Snippets Groups Projects
Commit cf106906 authored by Guillaume Gielly's avatar Guillaume Gielly
Browse files

Code cleanup

parent ffd5edd2
No related branches found
No related tags found
1 merge request!599Implement 'langinfo.h' and refactor strftime() to use langinfo constants
{
"cmake.ignoreCMakeListsMissing": true
}
\ No newline at end of file
// https://pubs.opengroup.org/onlinepubs/7908799/xsh/strptime.html
use crate::{
header::{string::strlen, time::tm},
platform::types::size_t,
};
use alloc::{string::String, vec::Vec};
use core::{
ffi::{c_char, c_int},
ffi::{c_char, c_int, c_void, CStr},
mem::MaybeUninit,
ptr, slice, str,
ptr,
ptr::NonNull,
slice, str,
};
use crate::{header::time::tm, platform::types::size_t};
/// For convenience, we define some helper constants for the C-locale.
static SHORT_DAYS: [&str; 7] = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
static LONG_DAYS: [&str; 7] = [
......@@ -38,418 +42,428 @@ static LONG_MONTHS: [&str; 12] = [
"December",
];
// Macro for matching a character ignoring ASCII case
macro_rules! eq_icase {
($c1:expr, $c2:expr) => {
$c1.eq_ignore_ascii_case(&$c2)
#[no_mangle]
pub unsafe extern "C" fn strptime(
buf: *const c_char,
format: *const c_char,
tm: *mut tm,
) -> *mut c_char {
// Validate inputs
let buf_ptr = if let Some(ptr) = NonNull::new(buf as *const c_void as *mut c_void) {
ptr
} else {
return ptr::null_mut();
};
//
let fmt_ptr = if let Some(ptr) = NonNull::new(format as *const c_void as *mut c_void) {
ptr
} else {
return ptr::null_mut();
};
}
#[no_mangle]
pub extern "C" fn strptime(buf: *const c_char, format: *const c_char, tm: *mut tm) -> *mut c_char {
unsafe {
if buf.is_null() || format.is_null() || tm.is_null() {
let tm_ptr = if let Some(ptr) = NonNull::new(tm) {
ptr
} else {
return ptr::null_mut();
};
// Convert raw pointers into slices/strings.
let input_str = unsafe {
if buf.is_null() {
return ptr::null_mut();
}
match CStr::from_ptr(buf).to_str() {
Ok(s) => s,
Err(_) => return ptr::null_mut(), // Not a valid UTF-8
}
};
let fmt_str = unsafe {
if format.is_null() {
return ptr::null_mut();
}
match CStr::from_ptr(format).to_str() {
Ok(s) => s,
Err(_) => return ptr::null_mut(), // Not a valid UTF-8
}
};
// Convert raw pointers into Rust slices/strings.
let mut input_str = cstr_to_str(buf);
let fmt_str = cstr_to_str(format);
// Zero-initialize the output `tm` structure
// (equivalent to: tm_sec=0, tm_min=0, tm_hour=0...)
ptr::write_bytes(tm, 0, 1);
// Zero-initialize the output `tm` structure
// (equivalent to: tm_sec=0, tm_min=0, tm_hour=0, etc.)
ptr::write_bytes(tm, 0, 1);
// We parse the format specifiers in a loop
let mut fmt_chars = fmt_str.chars().peekable();
let mut index_in_input = 0;
// We parse the format specifiers in a loop
let mut fmt_chars = fmt_str.chars().peekable();
let mut index_in_input = 0;
while let Some(fc) = fmt_chars.next() {
if fc != '%' {
// If it's a normal character, we expect it to match exactly in input
if input_str.len() <= index_in_input {
return ptr::null_mut(); // input ended too soon
}
let in_char = input_str.as_bytes()[index_in_input] as char;
if in_char != fc {
// mismatch
return ptr::null_mut();
}
index_in_input += 1;
continue;
}
while let Some(fc) = fmt_chars.next() {
if fc != '%' {
// If it's a normal character, we expect it to match exactly in input
if input_str.len() <= index_in_input {
return ptr::null_mut(); // input ended too soon
// If we see '%', read the next character
let Some(spec) = fmt_chars.next() else {
// format string ended abruptly after '%'
return ptr::null_mut();
};
// POSIX says `%E` or `%O` are modified specifiers for locale.
// We will skip them if they appear (like strftime does) and read the next char.
let final_spec = if spec == 'E' || spec == 'O' {
match fmt_chars.next() {
Some(ch) => ch,
None => return ptr::null_mut(),
}
} else {
spec
};
// Handle known specifiers
match final_spec {
///////////////////////////
// Whitespace: %n or %t //
///////////////////////////
'n' | 't' => {
// Skip over any whitespace in the input
while index_in_input < input_str.len()
&& input_str.as_bytes()[index_in_input].is_ascii_whitespace()
{
index_in_input += 1;
}
let in_char = input_str.as_bytes()[index_in_input] as char;
if in_char != fc {
// mismatch
}
///////////////////////////
// Literal % => "%%" //
///////////////////////////
'%' => {
if index_in_input >= input_str.len()
|| input_str.as_bytes()[index_in_input] as char != '%'
{
return ptr::null_mut();
}
index_in_input += 1;
continue;
}
// If we see '%', read the next character (format specifier)
let Some(spec) = fmt_chars.next() else {
// format string ended abruptly after '%'
return ptr::null_mut();
};
///////////////////////////
// Day of Month: %d / %e //
///////////////////////////
'd' | 'e' => {
// parse a 2-digit day (with or without leading zero)
let (val, len) = match parse_int(&input_str[index_in_input..], 2, false) {
Some(v) => v,
None => return ptr::null_mut(),
};
(*tm).tm_mday = val as c_int;
index_in_input += len;
}
// POSIX says `%E` or `%O` are "modified" specifiers for locale.
// We'll skip them if they appear (like strftime does) and read the next char.
let final_spec = if spec == 'E' || spec == 'O' {
match fmt_chars.next() {
Some(ch) => ch,
///////////////////////////
// Month: %m //
///////////////////////////
'm' => {
// parse a 2-digit month
let (val, len) = match parse_int(&input_str[index_in_input..], 2, false) {
Some(v) => v,
None => return ptr::null_mut(),
};
// tm_mon is 0-based (0 = Jan, 1 = Feb,...)
(*tm).tm_mon = (val as c_int) - 1;
if (*tm).tm_mon < 0 || (*tm).tm_mon > 11 {
return ptr::null_mut();
}
} else {
spec
};
// Handle known specifiers
match final_spec {
///////////////////////////
// Whitespace: %n or %t //
///////////////////////////
'n' | 't' => {
// Skip over any whitespace in the input
while index_in_input < input_str.len()
&& input_str.as_bytes()[index_in_input].is_ascii_whitespace()
{
index_in_input += 1;
}
}
index_in_input += len;
}
///////////////////////////
// Literal % => "%%" //
///////////////////////////
'%' => {
if index_in_input >= input_str.len()
|| input_str.as_bytes()[index_in_input] as char != '%'
{
return ptr::null_mut();
}
index_in_input += 1;
}
//////////////////////////////
// Year without century: %y //
//////////////////////////////
'y' => {
// parse a 2-digit year
let (val, len) = match parse_int(&input_str[index_in_input..], 2, false) {
Some(v) => v,
None => return ptr::null_mut(),
};
// According to POSIX, %y in strptime is [00,99], and the "year" is 1900..1999 for [00..99],
// but the standard says: "values in [69..99] refer to 1969..1999, [00..68] => 2000..2068"
let fullyear = if val >= 69 { val + 1900 } else { val + 2000 };
(*tm).tm_year = (fullyear - 1900) as c_int;
index_in_input += len;
}
///////////////////////////
// Day of Month: %d / %e //
///////////////////////////
'd' | 'e' => {
// parse a 2-digit day (with or without leading zero)
let (val, len) = match parse_int(&input_str[index_in_input..], 2, false) {
Some(v) => v,
None => return ptr::null_mut(),
};
(*tm).tm_mday = val as c_int;
index_in_input += len;
}
///////////////////////////
// Year with century: %Y //
///////////////////////////
'Y' => {
// parse up to 4-digit (or more) year
// We allow more than 4 digits if needed
let (val, len) = match parse_int(&input_str[index_in_input..], 4, true) {
Some(v) => v,
None => return ptr::null_mut(),
};
(*tm).tm_year = (val as c_int) - 1900;
index_in_input += len;
}
///////////////////////////
// Month: %m //
///////////////////////////
'm' => {
// parse a 2-digit month
let (val, len) = match parse_int(&input_str[index_in_input..], 2, false) {
Some(v) => v,
None => return ptr::null_mut(),
};
// tm_mon is 0-based (0 = Jan, 1 = Feb,...)
(*tm).tm_mon = (val as c_int) - 1;
if (*tm).tm_mon < 0 || (*tm).tm_mon > 11 {
return ptr::null_mut();
}
index_in_input += len;
///////////////////////////
// Hour (00..23): %H //
///////////////////////////
'H' => {
let (val, len) = match parse_int(&input_str[index_in_input..], 2, false) {
Some(v) => v,
None => return ptr::null_mut(),
};
if val > 23 {
return ptr::null_mut();
}
(*tm).tm_hour = val as c_int;
index_in_input += len;
}
///////////////////////////
// Year without century: %y
///////////////////////////
'y' => {
// parse a 2-digit year
let (val, len) = match parse_int(&input_str[index_in_input..], 2, false) {
Some(v) => v,
None => return ptr::null_mut(),
};
// According to POSIX, %y in strptime is [00,99], and the "year" is 1900..1999 for [00..99],
// but the standard says: "values in [69..99] refer to 1969..1999, [00..68] => 2000..2068"
let fullyear = if val >= 69 { val + 1900 } else { val + 2000 };
(*tm).tm_year = (fullyear - 1900) as c_int;
index_in_input += len;
///////////////////////////
// Hour (01..12): %I //
///////////////////////////
'I' => {
let (val, len) = match parse_int(&input_str[index_in_input..], 2, false) {
Some(v) => v,
None => return ptr::null_mut(),
};
if val < 1 || val > 12 {
return ptr::null_mut();
}
(*tm).tm_hour = val as c_int;
// We’ll interpret AM/PM with %p if it appears later
index_in_input += len;
}
///////////////////////////
// Year with century: %Y
///////////////////////////
'Y' => {
// parse up to 4-digit (or more) year
// We allow more than 4 digits if needed
let (val, len) = match parse_int(&input_str[index_in_input..], 4, true) {
Some(v) => v,
None => return ptr::null_mut(),
};
(*tm).tm_year = (val as c_int) - 1900;
index_in_input += len;
///////////////////////////
// Minute (00..59): %M //
///////////////////////////
'M' => {
let (val, len) = match parse_int(&input_str[index_in_input..], 2, false) {
Some(v) => v,
None => return ptr::null_mut(),
};
if val > 59 {
return ptr::null_mut();
}
(*tm).tm_min = val as c_int;
index_in_input += len;
}
///////////////////////////
// Hour (00..23): %H //
///////////////////////////
'H' => {
let (val, len) = match parse_int(&input_str[index_in_input..], 2, false) {
Some(v) => v,
None => return ptr::null_mut(),
};
if val > 23 {
return ptr::null_mut();
}
(*tm).tm_hour = val as c_int;
index_in_input += len;
///////////////////////////
// Seconds (00..60): %S //
///////////////////////////
'S' => {
let (val, len) = match parse_int(&input_str[index_in_input..], 2, false) {
Some(v) => v,
None => return ptr::null_mut(),
};
if val > 60 {
return ptr::null_mut();
}
(*tm).tm_sec = val as c_int;
index_in_input += len;
}
///////////////////////////
// Hour (01..12): %I //
///////////////////////////
'I' => {
let (val, len) = match parse_int(&input_str[index_in_input..], 2, false) {
Some(v) => v,
None => return ptr::null_mut(),
};
if val < 1 || val > 12 {
return ptr::null_mut();
///////////////////////////
// AM/PM: %p //
///////////////////////////
'p' => {
// Parse either "AM" or "PM" (no case-sensitive)
// We'll read up to 2 or 3 letters from input ("AM", "PM")
let leftover = &input_str[index_in_input..];
let parsed_len = match parse_am_pm(leftover) {
Some((is_pm, used)) => {
if (*tm).tm_hour == 12 {
// 12 AM => 00:xx, 12 PM => 12:xx
(*tm).tm_hour = if is_pm { 12 } else { 0 };
} else {
// 1..11 AM => 1..11, 1..11 PM => 13..23
if is_pm {
(*tm).tm_hour += 12;
}
}
used
}
(*tm).tm_hour = val as c_int;
// We’ll interpret AM/PM with %p if it appears later
index_in_input += len;
}
None => return ptr::null_mut(),
};
index_in_input += parsed_len;
}
///////////////////////////
// Minute (00..59): %M //
///////////////////////////
'M' => {
let (val, len) = match parse_int(&input_str[index_in_input..], 2, false) {
Some(v) => v,
None => return ptr::null_mut(),
};
if val > 59 {
return ptr::null_mut();
///////////////////////////
// Weekday Name: %a/%A //
///////////////////////////
'a' => {
// Abbreviated day name (Sun..Sat)
let leftover = &input_str[index_in_input..];
let parsed_len = match parse_weekday(leftover, true) {
Some((wday, used)) => {
(*tm).tm_wday = wday as c_int;
used
}
(*tm).tm_min = val as c_int;
index_in_input += len;
}
///////////////////////////
// Seconds (00..60): %S //
///////////////////////////
'S' => {
let (val, len) = match parse_int(&input_str[index_in_input..], 2, false) {
Some(v) => v,
None => return ptr::null_mut(),
};
if val > 60 {
return ptr::null_mut();
None => return ptr::null_mut(),
};
index_in_input += parsed_len;
}
'A' => {
// Full day name (Sunday..Saturday)
let leftover = &input_str[index_in_input..];
let parsed_len = match parse_weekday(leftover, false) {
Some((wday, used)) => {
(*tm).tm_wday = wday as c_int;
used
}
(*tm).tm_sec = val as c_int;
index_in_input += len;
}
///////////////////////////
// AM/PM: %p //
///////////////////////////
'p' => {
// Parse either "AM" or "PM" (case-insensitive)
// We'll read up to 2 or 3 letters from input ("AM", "PM")
let leftover = &input_str[index_in_input..];
let parsed_len = match parse_am_pm(leftover) {
Some((is_pm, used)) => {
if (*tm).tm_hour == 12 {
// 12 AM => 00:xx, 12 PM => 12:xx
(*tm).tm_hour = if is_pm { 12 } else { 0 };
} else {
// 1..11 AM => 1..11, 1..11 PM => 13..23
if is_pm {
(*tm).tm_hour += 12;
}
}
used
}
None => return ptr::null_mut(),
};
index_in_input += parsed_len;
}
///////////////////////////
// Weekday Name: %a/%A //
///////////////////////////
'a' => {
// Abbreviated day name (Sun..Sat)
let leftover = &input_str[index_in_input..];
let parsed_len = match parse_weekday(leftover, true) {
Some((wday, used)) => {
(*tm).tm_wday = wday as c_int;
used
}
None => return ptr::null_mut(),
};
index_in_input += parsed_len;
}
'A' => {
// Full day name (Sunday..Saturday)
let leftover = &input_str[index_in_input..];
let parsed_len = match parse_weekday(leftover, false) {
Some((wday, used)) => {
(*tm).tm_wday = wday as c_int;
used
}
None => return ptr::null_mut(),
};
index_in_input += parsed_len;
}
///////////////////////////
// Month Name: %b/%B/%h //
///////////////////////////
'b' | 'h' => {
// Abbreviated month name
let leftover = &input_str[index_in_input..];
let parsed_len = match parse_month(leftover, true) {
Some((mon, used)) => {
(*tm).tm_mon = mon as c_int;
used
}
None => return ptr::null_mut(),
};
index_in_input += parsed_len;
}
'B' => {
// Full month name
let leftover = &input_str[index_in_input..];
let parsed_len = match parse_month(leftover, false) {
Some((mon, used)) => {
(*tm).tm_mon = mon as c_int;
used
}
None => return ptr::null_mut(),
};
index_in_input += parsed_len;
}
None => return ptr::null_mut(),
};
index_in_input += parsed_len;
}
///////////////////////////
// Day of year: %j //
///////////////////////////
'j' => {
// parse 3-digit day of year [001..366]
let (val, len) = match parse_int(&input_str[index_in_input..], 3, false) {
Some(v) => v,
None => return ptr::null_mut(),
};
if val < 1 || val > 366 {
return ptr::null_mut();
///////////////////////////
// Month Name: %b/%B/%h //
///////////////////////////
'b' | 'h' => {
// Abbreviated month name
let leftover = &input_str[index_in_input..];
let parsed_len = match parse_month(leftover, true) {
Some((mon, used)) => {
(*tm).tm_mon = mon as c_int;
used
}
// store in tm_yday
(*tm).tm_yday = (val - 1) as c_int;
index_in_input += len;
}
///////////////////////////
// Date shortcuts: %D, %F, etc.
///////////////////////////
'D' => {
// Equivalent to "%m/%d/%y"
// We can do a mini strptime recursion or manually parse
// For simplicity, we'll do it inline here
let subfmt = "%m/%d/%y";
let used = match apply_subformat(&input_str[index_in_input..], subfmt, tm) {
Some(v) => v,
None => return ptr::null_mut(),
};
index_in_input += used;
}
'F' => {
// Equivalent to "%Y-%m-%d"
let subfmt = "%Y-%m-%d";
let used = match apply_subformat(&input_str[index_in_input..], subfmt, tm) {
Some(v) => v,
None => return ptr::null_mut(),
};
index_in_input += used;
}
None => return ptr::null_mut(),
};
index_in_input += parsed_len;
}
'B' => {
// Full month name
let leftover = &input_str[index_in_input..];
let parsed_len = match parse_month(leftover, false) {
Some((mon, used)) => {
(*tm).tm_mon = mon as c_int;
used
}
None => return ptr::null_mut(),
};
index_in_input += parsed_len;
}
///////////////////////////
// Not implemented: %x, %X, %c, %r, %R, %T, etc.
///////////////////////////
// If you want to implement these, do similarly to %D / %F or parse manually
'x' | 'X' | 'c' | 'r' | 'R' | 'T' => {
// For brevity, we skip these. You can expand similarly.
// Return NULL if we don’t want to accept them:
///////////////////////////
// Day of year: %j //
///////////////////////////
'j' => {
// parse 3-digit day of year [001..366]
let (val, len) = match parse_int(&input_str[index_in_input..], 3, false) {
Some(v) => v,
None => return ptr::null_mut(),
};
if val < 1 || val > 366 {
return ptr::null_mut();
}
// store in tm_yday
(*tm).tm_yday = (val - 1) as c_int;
index_in_input += len;
}
///////////////////////////
// Timezone: %Z or %z
///////////////////////////
'Z' | 'z' => {
// Full/abbrev time zone name or numeric offset
// Implementation omitted. Real support is quite complicated.
return ptr::null_mut();
}
//////////////////////////////////
// Date shortcuts: %D, %F, etc. //
//////////////////////////////////
'D' => {
// Equivalent to "%m/%d/%y"
// We can do a mini strptime recursion or manually parse
// For simplicity, we'll do it inline here
let subfmt = "%m/%d/%y";
let used = match apply_subformat(&input_str[index_in_input..], subfmt, tm) {
Some(v) => v,
None => return ptr::null_mut(),
};
index_in_input += used;
}
'F' => {
// Equivalent to "%Y-%m-%d"
let subfmt = "%Y-%m-%d";
let used = match apply_subformat(&input_str[index_in_input..], subfmt, tm) {
Some(v) => v,
None => return ptr::null_mut(),
};
index_in_input += used;
}
//////////
// else //
//////////
_ => {
// We do not recognize this specifier
return ptr::null_mut();
}
//////////////////////////////////////////////////////////
// TODO : not implemented: %x, %X, %c, %r, %R, %T, etc. //
//////////////////////////////////////////////////////////
// Hint : if you want to implement these, do similarly to %D / %F (or parse manually)
'x' | 'X' | 'c' | 'r' | 'R' | 'T' => {
// Return NULL if we don’t want to accept them :
return ptr::null_mut();
}
}
// If we got here, parsing was successful. Return pointer to the
// next unparsed character in `buf`.
let ret_ptr = buf.add(index_in_input);
ret_ptr as *mut c_char
///////////////////////////
// Timezone: %Z or %z //
///////////////////////////
'Z' | 'z' => {
// Full/abbrev time zone name or numeric offset
// Implementation omitted. Real support is quite complicated.
return ptr::null_mut();
}
//////////
// else //
//////////
_ => {
// We do not recognize this specifier
return ptr::null_mut();
}
}
}
// If we got here, parsing was successful. Return pointer to the
// next unparsed character in `buf`.
let ret_ptr = buf.add(index_in_input);
ret_ptr as *mut c_char
}
// -----------------------
// Helper / Parsing Logic
// -----------------------
/// Convert a C char pointer to a Rust &str (assuming it's valid UTF-8).
/// Returns an empty string if invalid.
unsafe fn cstr_to_str<'a>(ptr: *const c_char) -> &'a str {
if ptr.is_null() {
return "";
}
let len = strlen(ptr);
let bytes = slice::from_raw_parts(ptr as *const u8, len);
str::from_utf8(bytes).unwrap_or("")
}
/// Minimal strlen for C-strings
unsafe fn strlen(mut ptr: *const c_char) -> usize {
let mut count = 0;
while !ptr.is_null() && *ptr != 0 {
ptr = ptr.add(1);
count += 1;
}
count
}
/// Parse an integer from the beginning of `input_str`.
///
/// - `width` is the maximum number of digits to parse
/// - `allow_variable_width` indicates if we can parse fewer digits
/// (e.g., `%Y` can have more than 4 digits, but also might parse "2023" or "12345").
fn parse_int(input_str: &str, width: usize, allow_variable_width: bool) -> Option<(i32, usize)> {
fn parse_int(input: &str, width: usize, allow_variable: bool) -> Option<(i32, usize)> {
let mut val = 0i32;
let mut chars = input.chars();
let mut count = 0;
let mut value: i32 = 0;
let chars: Vec<char> = input_str.chars().collect();
for c in chars.iter() {
while let Some(c) = chars.next() {
if !c.is_ascii_digit() {
break;
}
value = value * 10 + (*c as u8 as i32 - '0' as i32);
// Check for integer overflow
val = val.checked_mul(10)?.checked_add((c as u8 - b'0') as i32)?;
count += 1;
if count == width && !allow_variable_width {
if count == width && !allow_variable {
break;
}
}
if count == 0 {
return None; // no digits found
None
} else {
Some((val, count))
}
Some((value, count))
}
/// Handle AM/PM. Returns (is_pm, length_consumed).
......@@ -468,8 +482,8 @@ fn parse_am_pm(s: &str) -> Option<(bool, usize)> {
}
/// Parse a weekday name from `s`.
/// - If `abbrev == true`, match short forms: "Sun".."Sat"
/// - Otherwise, match "Sunday".."Saturday"
/// - if `abbrev == true`, match short forms: "Mont".."Sun"
/// - otherwise, match "Monday".."Sunday"
/// Return (weekday_index, length_consumed).
fn parse_weekday(s: &str, abbrev: bool) -> Option<(usize, usize)> {
let list = if abbrev { &SHORT_DAYS } else { &LONG_DAYS };
......@@ -499,8 +513,7 @@ fn parse_month(s: &str, abbrev: bool) -> Option<(usize, usize)> {
/// Return how many characters of `input` were consumed or None on error.
unsafe fn apply_subformat(input: &str, subfmt: &str, tm: *mut tm) -> Option<usize> {
// We'll do a temporary strptime call on a substring.
// Then we see how many chars it consumed.
// If that call fails, we return None.
// Then we see how many chars it consumed. If that call fails, we return None.
// Otherwise, we return the count.
// Convert `input` to a null-terminated buffer temporarily
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment