diff --git a/src/header/stdio/getdelim.rs b/src/header/stdio/getdelim.rs index 59b2aa04edb5823de948251cf027069aa52345e7..c5242e310c6653958d289c9f0ac254fcd4e172fe 100644 --- a/src/header/stdio/getdelim.rs +++ b/src/header/stdio/getdelim.rs @@ -1,31 +1,80 @@ -use alloc::vec::Vec; -use core::ptr; +// https://pubs.opengroup.org/onlinepubs/9699919799/functions/getline.html + +use alloc::{string::String, vec::Vec}; +use core::{fmt::Write, intrinsics::unlikely, ops::Deref, ptr}; use crate::{ - header::{stdio::FILE, stdlib}, + header::{ + errno::{EINVAL, ENOMEM, ENOSPC, EOVERFLOW}, + stdio, + stdio::FILE, + stdlib, + }, io::BufRead, + platform, platform::types::*, }; +use crate::{ + header::stdio::{default_stdout, feof, ferror, F_EOF, F_ERR}, + platform::errno, +}; + +/// see getdelim (getline is a special case of getdelim with delim == '\n') #[no_mangle] -pub unsafe extern "C" fn __getline( +pub unsafe extern "C" fn getline( lineptr: *mut *mut c_char, n: *mut size_t, stream: *mut FILE, ) -> ssize_t { - __getdelim(lineptr, n, b'\n' as c_int, stream) + getdelim(lineptr, n, b'\n' as c_int, stream) } +// One *could* read the standard as 'getdelim sets the stream error flag on *any* error, though +// since glibc doesn't seem to do this, I won't either + +/// https://pubs.opengroup.org/onlinepubs/9699919799/functions/getline.html +/// +/// # Safety +/// - `lineptr, *lineptr, `n`, `stream` pointers must be valid and have to be aligned. +/// - `stream` has to be a valid file handle returned by fopen and likes. +/// +/// # Deviation from POSIX +/// - **EINVAL is set on stream being NULL or delim not fitting into char** (POSIX allows UB) +/// - **`*n` can contain invalid data.** The buffer size `n` is not read, instead realloc is called each time. That is in principle +/// inefficent since the buffer is reallocated in memory for every call, but if `n` is by mistake +/// bigger than the number of bytes allocated for the buffer, there can be no out-of-bounds write. +/// - On non-stream-related errors, the error indicator of the stream is *not* set. Posix states +/// "If an error occurs, the error indicator for the stream shall be set, and the function shall +/// return -1 and set errno to indicate the error." but in cases that produce EINVAL even glibc +/// doesn't seem to set the error indicator, so we also don't. #[no_mangle] -pub unsafe extern "C" fn __getdelim( +pub unsafe extern "C" fn getdelim( lineptr: *mut *mut c_char, n: *mut size_t, delim: c_int, stream: *mut FILE, ) -> ssize_t { - let lineptr = &mut *lineptr; - let n = &mut *n; - let delim = delim as u8; + let (lineptr, n, stream) = + if let (Some(ptr), Some(n), Some(file)) = (lineptr.as_mut(), n.as_mut(), stream.as_mut()) { + (ptr, n, file) + } else { + errno = EINVAL; + return -1 as ssize_t; + }; + + if feof(stream) != 0 || ferror(stream) != 0 { + return -1 as ssize_t; + } + + // POSIX specifies UB but we test anyway + // returning EINVAL in that case + let delim: u8 = if let Ok(delim) = delim.try_into() { + delim + } else { + errno = EINVAL; + return -1; + }; //TODO: More efficient algorithm using lineptr and n instead of this vec let mut buf = Vec::new(); @@ -33,15 +82,51 @@ pub unsafe extern "C" fn __getdelim( let mut stream = (*stream).lock(); match stream.read_until(delim, &mut buf) { Ok(ok) => ok, - Err(err) => return -1, + Err(err) => { + stream.flags &= F_ERR; + return -1; + } } }; + // "[EOVERFLOW] + // The number of bytes to be written into the buffer, including the delimiter character (if encountered), would exceed {SSIZE_MAX}." + if unlikely(count > ssize_t::MAX as usize) { + errno = EOVERFLOW; + return -1; + } + + // we reached EOF if either + // - we have no last elem (because vec is empty), or + // - the last elem doesn't match the delimiter + let eof_reached = if let Some(last) = buf.last() { + *last == delim + } else { + true + }; + + // "If the end-of-file indicator for the stream is set, or if no characters were read and the + // stream is at end-of-file, the end-of-file indicator for the stream shall be set and the + // function shall return -1." + if eof_reached { + stream.flags &= F_EOF; + if count == 0 { + return -1; + } + } + //TODO: Check errors and improve safety { - // Allocate lineptr to size of buf and set n to size of lineptr + // Allocate lineptr to size of buf plus NUL byte and set n to size of lineptr *n = count + 1; + // The advantage in always realloc'ing is that even if the user supplies a wrong n, this + // doesn't break *lineptr = stdlib::realloc(*lineptr as *mut c_void, *n) as *mut c_char; + if unlikely(lineptr.is_null() && *n != 0usize) { + // memory error; realloc returns NULL on alloc'ing 0 bytes + errno = ENOMEM; + return -1; + } // Copy buf to lineptr ptr::copy(buf.as_ptr(), *lineptr as *mut u8, count); @@ -49,7 +134,12 @@ pub unsafe extern "C" fn __getdelim( // NUL terminate lineptr *lineptr.offset(count as isize) = 0; + // TODO remove + /*eprintln!( + "[DBG]{}: {}, {:?}, {:?}, {:?}", line!(), + String::from_utf8(buf).unwrap(), count, *n, *lineptr + );*/ // Return allocated size - *n as ssize_t + count as ssize_t } } diff --git a/tests/Makefile b/tests/Makefile index 47a54ada8fad63bdf5feedccf5f0711141070ab7..5afa290a53585e2aab104cd3c591a0bb0aa3750b 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -33,6 +33,7 @@ EXPECT_NAMES=\ stdio/fseek \ stdio/fwrite \ stdio/getc_unget \ + stdio/getline \ stdio/mutex \ stdio/popen \ stdio/printf \ diff --git a/tests/expected/bins_static/stdio/getline.stderr b/tests/expected/bins_static/stdio/getline.stderr new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/expected/bins_static/stdio/getline.stdout b/tests/expected/bins_static/stdio/getline.stdout new file mode 100644 index 0000000000000000000000000000000000000000..ba753ef6b2873b22d79fcad2615c2f35826185ad --- /dev/null +++ b/tests/expected/bins_static/stdio/getline.stdout @@ -0,0 +1,41 @@ + 1: getline(NULL, NULL, stream) + => 22 (Invalid argument) - EINVAL + 2: getline(NULL, &n, stream) + => 22 (Invalid argument) - EINVAL + 3: getline(&lineptr, NULL, stream) + => 22 (Invalid argument) - EINVAL + 4: getline(NULL, NULL, NULL) + => 22 (Invalid argument) - EINVAL + 5: getline(&lineptr, NULL, NULL) + => 22 (Invalid argument) - EINVAL + 6: getline(NULL, &n, NULL) + => 22 (Invalid argument) - EINVAL + 7: getline(&lineptr, &n, NULL) + => 22 (Invalid argument) - EINVAL + 8: getdelim(&lineptr, &n, 25600, stream) + => 22 (Invalid argument) - EINVAL + + 1: status = 27, strlen = 27, feof = 0, ferror = 0 + |>Space: the final frontier. + 2: status = 1, strlen = 1, feof = 0, ferror = 0 + |> + 3: status = 50, strlen = 50, feof = 0, ferror = 0 + |>These are the voyages of the starship Enterprise. + 4: status = 24, strlen = 24, feof = 0, ferror = 0 + |>Its continuing mission: + 5: status = 33, strlen = 33, feof = 0, ferror = 0 + |>- to explore strange new worlds; + 6: status = 46, strlen = 46, feof = 0, ferror = 0 + |>- to seek out new life and new civilizations; + 7: status = 45, strlen = 45, feof = 0, ferror = 0 + |>- to boldly go where no one has gone before! + +overread 1, status = -1, feof = 1, ferror = 0 +|~- to boldly go where no one has gone before! + +overread 2, status = -1, feof = 1, ferror = 0 +|~- to boldly go where no one has gone before! + +overread 3, status = -1, feof = 1, ferror = 0 +|~- to boldly go where no one has gone before! + diff --git a/tests/stdio/getline.c b/tests/stdio/getline.c new file mode 100644 index 0000000000000000000000000000000000000000..299265fcf6f5b46fd2f267e3233dec3974ad3fcf --- /dev/null +++ b/tests/stdio/getline.c @@ -0,0 +1,167 @@ +// https://pubs.opengroup.org/onlinepubs/9699919799/functions/getline.html + +/// ssize_t getdelim(char **restrict lineptr, size_t *restrict n, +/// int delimiter, FILE *restrict stream); +/// ssize_t getline(char **restrict lineptr, size_t *restrict n, +/// FILE *restrict stream); + +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> + +#include "test_helpers.h" + +const char *INFILE_NAME = "stdio/getline.in"; +const size_t INFILE_LINES = 7; +const size_t OVERREAD_LINES = 3; + +void test_null_args(); + +void test_ferror(); + +void test_read_and_overread(); + +int main(void) { + setbuf(stdout, NULL); + + // test if supplying NULL for either pointer correctly yields EINVAL + test_null_args(); + + // test reading stream with error flag set + test_ferror(); + + // "normal" case - read all `INFILE_LINES` lines of the test file and then overread by 3 + test_read_and_overread(); +} + +void test_null_args() { + + + // Basics: NULL handling + // Test all combinations of NULL pointers since we fail on any one. + // + // I can't explicitly find that stream can't be NULL but nothing else + // makes sense, so let's return EINVAL +#define TEST_FOR_EINVAL(x) \ + do \ + { \ + size_t n = 0; \ + char *lineptr = NULL; \ + ssize_t status = 0; \ + FILE *stream = NULL; \ + \ + stream = fopen(INFILE_NAME, "r"); \ + ERROR_IF(fopen, stream, == NULL); \ + ERROR_IF(fopen, ferror(stream), ); \ + \ + status = x; \ + printf("%3zu: %s\n => ", ++counter, #x); \ + CHECK_AND_PRINT_ERRNO(EINVAL); \ + assert(status == -1 && errno == EINVAL); \ + \ + if (stream != NULL) \ + fclose(stream); \ + if (lineptr != NULL) \ + free(lineptr); \ + stream = NULL; \ + errno = 0; \ + } while (0); + + static size_t counter = 0; + + TEST_FOR_EINVAL(getline(NULL, NULL, stream)); + + TEST_FOR_EINVAL(getline(NULL, &n, stream)); + TEST_FOR_EINVAL(getline(&lineptr, NULL, stream)); + + // don't always use, as glibc doesn't tolerate stream being NULL or delim being out-of-range for char +#ifdef _RELIBC_STDIO_H + TEST_FOR_EINVAL(getline(NULL, NULL, NULL)); + TEST_FOR_EINVAL(getline(&lineptr, NULL, NULL)); + TEST_FOR_EINVAL(getline(NULL, &n, NULL)); + TEST_FOR_EINVAL(getline(&lineptr, &n, NULL)); + + // test if using delim out of char range correctly causes EINVAL + // POSIX specifies UB, so we try to be more helpful + TEST_FOR_EINVAL(getdelim(&lineptr, &n, 25600, stream)); +#endif + +#undef TEST_FOR_EINVAL + printf("\n"); +} + +void test_ferror() { + // TODO test behavior on error flag set +} + +void test_read_and_overread() { + // Basic use cases + // Read all INFILE_LINES lines of sample input, then overread OVERREAD_LINES times + + size_t n = 0; + char *lineptr = NULL; + ssize_t status; + FILE *stream = NULL; + + stream = fopen(INFILE_NAME, "r"); + + for (size_t i = 0; i < INFILE_LINES; ++i) { + /// "Upon successful completion, the getline() and getdelim() functions + /// shall return the number of bytes written into the buffer, including + /// the delimiter character if one was encountered before EOF, but + /// excluding the terminating NUL character." + printf("%3zu: ", i + 1); + status = getline(&lineptr, &n, stream); + + /// "If the end-of-file indicator for the stream is set, or if no + /// characters were read and the stream is at end-of-file, the + /// end-of-file indicator for the stream shall be set and the function + /// shall return -1." + assert(!(status == -1 && feof(stream))); + + /// "If an error occurs, the error indicator for the stream shall be + /// set, and the function shall return -1 and set errno to indicate the + /// error." + ERROR_IF(getline, status, == -1 && ferror(stream)); + + // This should only execute if the other error cases (with stream flags set) did not abort. + UNEXP_IF(getline, status, == -1); + + // Print length and content to verify. Also test if the buffer is big + // enough and if strlen of input matches the return value. + // Although this doesn't HAVE to be true, as the input could contain + // NUL bytes, ours doesn't. + printf("status = %zi, strlen = %zu, feof = %s, ferror = %s\n |>%s", + status, strlen(lineptr), feof(stream) ? "1" : "0", ferror(stream) ? "1" : "0", lineptr); + // fflush(stdout); + + assert(strlen(lineptr) == (size_t) status); // we can cast to size_t since we + assert(n >= (size_t) status + 1); // UNEXP_IF against status being -1 + } + printf("\n"); + + // OVERREAD + + for (size_t i = 0; i < OVERREAD_LINES; ++i) { + /// "If the end-of-file indicator for the stream is set, or if no + /// characters were read and the stream is at end-of-file, the + /// end-of-file indicator for the stream shall be set and the function + /// shall return -1." + status = getline(&lineptr, &n, stream); + printf("overread %zu, status = %zi, feof = %s, ferror = %s\n", + i + 1, + status, + feof(stream) ? "1" : "0", ferror(stream) ? "1" : "0"); + if (i == 0) { + assert(status == -1); + assert(feof(stream)); + assert(!ferror(stream)); + } + printf("|~%s\n", lineptr); + } + + // cleanup + fclose(stream); + free(lineptr); +} diff --git a/tests/stdio/getline.in b/tests/stdio/getline.in new file mode 100644 index 0000000000000000000000000000000000000000..8399a9b4cb2ce066cbae72a7056ce95b4543294d --- /dev/null +++ b/tests/stdio/getline.in @@ -0,0 +1,7 @@ +Space: the final frontier. + +These are the voyages of the starship Enterprise. +Its continuing mission: +- to explore strange new worlds; +- to seek out new life and new civilizations; +- to boldly go where no one has gone before! diff --git a/tests/test_helpers.h b/tests/test_helpers.h index c03a8c1059397920da67bb53515ae37b7d925cd5..5d7cd802a2372d970419c239a58a011481087cff 100644 --- a/tests/test_helpers.h +++ b/tests/test_helpers.h @@ -71,6 +71,7 @@ fprintf(stderr, _Generic((status), \ char *: "char*(%p) = \"%1$s\"", \ void *: "void*(%p)", \ + ssize_t: "%li", \ default: "%i" \ ), status); \ fprintf(stderr, "\n"); \ @@ -90,4 +91,27 @@ #define random_bool() (lrand48() % 2 == 0) +// Quick helper for checking desired errno status. +// Use as macro: CHECK_AND_PRINT_ERRNO(<desired errno, e.g. EINVAL>); +// If errno is as expected, prints this; otherwise, prints expected vs. actual and exit. +void printf_errno(int errnoval, char *errnoname) { + + printf("%d (%s)", + errno, strerror(errno)); + if (errno != errnoval) { + printf("\n^^^^^ FAILURE ^^^^^ (SHOULD BE %d - %s (%s))\n", + errnoval, errnoname, strerror(errnoval)); + } else { + printf(" - %s\n", errnoname); + } +} + +#define CHECK_AND_PRINT_ERRNO(errnoval) \ + do { \ + printf_errno(errnoval, #errnoval); \ + if (errnoval != errno) { \ + exit(EXIT_FAILURE); \ + } \ + } while(0); + #endif /* _TEST_HELPERS */