From b65cd4e5119c4ff87ed48e7b872f1b69115c0255 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Florian=20Mei=C3=9Fner?= <florian.meissner@mailbox.org>
Date: Fri, 20 Oct 2023 14:34:06 +0000
Subject: [PATCH] getline, getdelim: fix issues, extend tests

---
 src/header/stdio/getdelim.rs                  | 114 ++++++++++--
 tests/Makefile                                |   1 +
 .../expected/bins_static/stdio/getline.stderr |   0
 .../expected/bins_static/stdio/getline.stdout |  41 +++++
 tests/stdio/getline.c                         | 167 ++++++++++++++++++
 tests/stdio/getline.in                        |   7 +
 tests/test_helpers.h                          |  24 +++
 7 files changed, 342 insertions(+), 12 deletions(-)
 create mode 100644 tests/expected/bins_static/stdio/getline.stderr
 create mode 100644 tests/expected/bins_static/stdio/getline.stdout
 create mode 100644 tests/stdio/getline.c
 create mode 100644 tests/stdio/getline.in

diff --git a/src/header/stdio/getdelim.rs b/src/header/stdio/getdelim.rs
index 59b2aa04e..c5242e310 100644
--- a/src/header/stdio/getdelim.rs
+++ b/src/header/stdio/getdelim.rs
@@ -1,31 +1,80 @@
-use alloc::vec::Vec;
-use core::ptr;
+// https://pubs.opengroup.org/onlinepubs/9699919799/functions/getline.html
+
+use alloc::{string::String, vec::Vec};
+use core::{fmt::Write, intrinsics::unlikely, ops::Deref, ptr};
 
 use crate::{
-    header::{stdio::FILE, stdlib},
+    header::{
+        errno::{EINVAL, ENOMEM, ENOSPC, EOVERFLOW},
+        stdio,
+        stdio::FILE,
+        stdlib,
+    },
     io::BufRead,
+    platform,
     platform::types::*,
 };
 
+use crate::{
+    header::stdio::{default_stdout, feof, ferror, F_EOF, F_ERR},
+    platform::errno,
+};
+
+/// see getdelim (getline is a special case of getdelim with delim == '\n')
 #[no_mangle]
-pub unsafe extern "C" fn __getline(
+pub unsafe extern "C" fn getline(
     lineptr: *mut *mut c_char,
     n: *mut size_t,
     stream: *mut FILE,
 ) -> ssize_t {
-    __getdelim(lineptr, n, b'\n' as c_int, stream)
+    getdelim(lineptr, n, b'\n' as c_int, stream)
 }
 
+// One *could* read the standard as 'getdelim sets the stream error flag on *any* error, though
+// since glibc doesn't seem to do this, I won't either
+
+/// https://pubs.opengroup.org/onlinepubs/9699919799/functions/getline.html
+///
+/// # Safety
+/// - `lineptr, *lineptr, `n`, `stream` pointers must be valid and have to be aligned.
+/// - `stream` has to be a valid file handle returned by fopen and likes.
+///
+/// # Deviation from POSIX
+/// - **EINVAL is set on stream being NULL or delim not fitting into char** (POSIX allows UB)
+/// - **`*n` can contain invalid data.** The buffer size `n` is not read, instead realloc is called each time. That is in principle
+/// inefficent since the buffer is reallocated in memory for every call, but if `n` is by mistake
+/// bigger than the number of bytes allocated for the buffer, there can be no out-of-bounds write.
+/// - On non-stream-related errors, the error indicator of the stream is *not* set. Posix states
+/// "If an error occurs, the error indicator for the stream shall be set, and the function shall
+/// return -1 and set errno to indicate the error." but in cases that produce EINVAL even glibc
+/// doesn't seem to set the error indicator, so we also don't.
 #[no_mangle]
-pub unsafe extern "C" fn __getdelim(
+pub unsafe extern "C" fn getdelim(
     lineptr: *mut *mut c_char,
     n: *mut size_t,
     delim: c_int,
     stream: *mut FILE,
 ) -> ssize_t {
-    let lineptr = &mut *lineptr;
-    let n = &mut *n;
-    let delim = delim as u8;
+    let (lineptr, n, stream) =
+        if let (Some(ptr), Some(n), Some(file)) = (lineptr.as_mut(), n.as_mut(), stream.as_mut()) {
+            (ptr, n, file)
+        } else {
+            errno = EINVAL;
+            return -1 as ssize_t;
+        };
+
+    if feof(stream) != 0 || ferror(stream) != 0 {
+        return -1 as ssize_t;
+    }
+
+    // POSIX specifies UB but we test anyway
+    // returning EINVAL in that case
+    let delim: u8 = if let Ok(delim) = delim.try_into() {
+        delim
+    } else {
+        errno = EINVAL;
+        return -1;
+    };
 
     //TODO: More efficient algorithm using lineptr and n instead of this vec
     let mut buf = Vec::new();
@@ -33,15 +82,51 @@ pub unsafe extern "C" fn __getdelim(
         let mut stream = (*stream).lock();
         match stream.read_until(delim, &mut buf) {
             Ok(ok) => ok,
-            Err(err) => return -1,
+            Err(err) => {
+                stream.flags &= F_ERR;
+                return -1;
+            }
         }
     };
 
+    // "[EOVERFLOW]
+    // The number of bytes to be written into the buffer, including the delimiter character (if encountered), would exceed {SSIZE_MAX}."
+    if unlikely(count > ssize_t::MAX as usize) {
+        errno = EOVERFLOW;
+        return -1;
+    }
+
+    // we reached EOF if either
+    // - we have no last elem (because vec is empty), or
+    // - the last elem doesn't match the delimiter
+    let eof_reached = if let Some(last) = buf.last() {
+        *last == delim
+    } else {
+        true
+    };
+
+    // "If the end-of-file indicator for the stream is set, or if no characters were read and the
+    // stream is at end-of-file, the end-of-file indicator for the stream shall be set and the
+    // function shall return -1."
+    if eof_reached {
+        stream.flags &= F_EOF;
+        if count == 0 {
+            return -1;
+        }
+    }
+
     //TODO: Check errors and improve safety
     {
-        // Allocate lineptr to size of buf and set n to size of lineptr
+        // Allocate lineptr to size of buf plus NUL byte and set n to size of lineptr
         *n = count + 1;
+        // The advantage in always realloc'ing is that even if the user supplies a wrong n, this
+        // doesn't break
         *lineptr = stdlib::realloc(*lineptr as *mut c_void, *n) as *mut c_char;
+        if unlikely(lineptr.is_null() && *n != 0usize) {
+            // memory error; realloc returns NULL on alloc'ing 0 bytes
+            errno = ENOMEM;
+            return -1;
+        }
 
         // Copy buf to lineptr
         ptr::copy(buf.as_ptr(), *lineptr as *mut u8, count);
@@ -49,7 +134,12 @@ pub unsafe extern "C" fn __getdelim(
         // NUL terminate lineptr
         *lineptr.offset(count as isize) = 0;
 
+        // TODO remove
+        /*eprintln!(
+            "[DBG]{}: {}, {:?}, {:?}, {:?}", line!(),
+            String::from_utf8(buf).unwrap(), count, *n, *lineptr
+        );*/
         // Return allocated size
-        *n as ssize_t
+        count as ssize_t
     }
 }
diff --git a/tests/Makefile b/tests/Makefile
index 47a54ada8..5afa290a5 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -33,6 +33,7 @@ EXPECT_NAMES=\
 	stdio/fseek \
 	stdio/fwrite \
 	stdio/getc_unget \
+  stdio/getline \
 	stdio/mutex \
 	stdio/popen \
 	stdio/printf \
diff --git a/tests/expected/bins_static/stdio/getline.stderr b/tests/expected/bins_static/stdio/getline.stderr
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/expected/bins_static/stdio/getline.stdout b/tests/expected/bins_static/stdio/getline.stdout
new file mode 100644
index 000000000..ba753ef6b
--- /dev/null
+++ b/tests/expected/bins_static/stdio/getline.stdout
@@ -0,0 +1,41 @@
+  1: getline(NULL, NULL, stream)
+     => 22 (Invalid argument) - EINVAL
+  2: getline(NULL, &n, stream)
+     => 22 (Invalid argument) - EINVAL
+  3: getline(&lineptr, NULL, stream)
+     => 22 (Invalid argument) - EINVAL
+  4: getline(NULL, NULL, NULL)
+     => 22 (Invalid argument) - EINVAL
+  5: getline(&lineptr, NULL, NULL)
+     => 22 (Invalid argument) - EINVAL
+  6: getline(NULL, &n, NULL)
+     => 22 (Invalid argument) - EINVAL
+  7: getline(&lineptr, &n, NULL)
+     => 22 (Invalid argument) - EINVAL
+  8: getdelim(&lineptr, &n, 25600, stream)
+     => 22 (Invalid argument) - EINVAL
+
+  1: status = 27, strlen = 27, feof = 0, ferror = 0
+     |>Space: the final frontier.
+  2: status = 1, strlen = 1, feof = 0, ferror = 0
+     |>
+  3: status = 50, strlen = 50, feof = 0, ferror = 0
+     |>These are the voyages of the starship Enterprise.
+  4: status = 24, strlen = 24, feof = 0, ferror = 0
+     |>Its continuing mission:
+  5: status = 33, strlen = 33, feof = 0, ferror = 0
+     |>- to explore strange new worlds;
+  6: status = 46, strlen = 46, feof = 0, ferror = 0
+     |>- to seek out new life and new civilizations;
+  7: status = 45, strlen = 45, feof = 0, ferror = 0
+     |>- to boldly go where no one has gone before!
+
+overread 1, status = -1, feof = 1, ferror = 0
+|~- to boldly go where no one has gone before!
+
+overread 2, status = -1, feof = 1, ferror = 0
+|~- to boldly go where no one has gone before!
+
+overread 3, status = -1, feof = 1, ferror = 0
+|~- to boldly go where no one has gone before!
+
diff --git a/tests/stdio/getline.c b/tests/stdio/getline.c
new file mode 100644
index 000000000..299265fcf
--- /dev/null
+++ b/tests/stdio/getline.c
@@ -0,0 +1,167 @@
+// https://pubs.opengroup.org/onlinepubs/9699919799/functions/getline.html
+
+/// ssize_t getdelim(char **restrict lineptr, size_t *restrict n,
+///                  int delimiter, FILE *restrict stream);
+/// ssize_t getline(char **restrict lineptr, size_t *restrict n,
+///                 FILE *restrict stream);
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "test_helpers.h"
+
+const char   *INFILE_NAME   = "stdio/getline.in";
+const size_t INFILE_LINES   = 7;
+const size_t OVERREAD_LINES = 3;
+
+void test_null_args();
+
+void test_ferror();
+
+void test_read_and_overread();
+
+int main(void) {
+    setbuf(stdout, NULL);
+
+    // test if supplying NULL for either pointer correctly yields EINVAL
+    test_null_args();
+
+    // test reading stream with error flag set
+    test_ferror();
+
+    // "normal" case - read all `INFILE_LINES` lines of the test file and then overread by 3
+    test_read_and_overread();
+}
+
+void test_null_args() {
+
+
+    // Basics: NULL handling
+    // Test all combinations of NULL pointers since we fail on any one.
+    //
+    // I can't explicitly find that stream can't be NULL but nothing else
+    // makes sense, so let's return EINVAL
+#define TEST_FOR_EINVAL(x)                           \
+    do                                               \
+    {                                                \
+        size_t n = 0;                                \
+        char *lineptr = NULL;                        \
+        ssize_t status = 0;                          \
+        FILE *stream = NULL;                         \
+                                                     \
+        stream = fopen(INFILE_NAME, "r");            \
+        ERROR_IF(fopen, stream, == NULL);            \
+        ERROR_IF(fopen, ferror(stream), );           \
+                                                     \
+        status = x;                                  \
+        printf("%3zu: %s\n     => ", ++counter, #x); \
+        CHECK_AND_PRINT_ERRNO(EINVAL);               \
+        assert(status == -1 && errno == EINVAL);     \
+                                                     \
+        if (stream != NULL)                          \
+            fclose(stream);                          \
+        if (lineptr != NULL)                         \
+            free(lineptr);                           \
+        stream = NULL;                               \
+        errno = 0;                                   \
+    } while (0);
+
+        static size_t counter = 0;
+
+        TEST_FOR_EINVAL(getline(NULL, NULL, stream));
+
+        TEST_FOR_EINVAL(getline(NULL, &n, stream));
+        TEST_FOR_EINVAL(getline(&lineptr, NULL, stream));
+
+        // don't always use, as glibc doesn't tolerate stream being NULL or delim being out-of-range for char
+#ifdef _RELIBC_STDIO_H
+        TEST_FOR_EINVAL(getline(NULL, NULL, NULL));
+        TEST_FOR_EINVAL(getline(&lineptr, NULL, NULL));
+        TEST_FOR_EINVAL(getline(NULL, &n, NULL));
+        TEST_FOR_EINVAL(getline(&lineptr, &n, NULL));
+
+        // test if using delim out of char range correctly causes EINVAL
+        // POSIX specifies UB, so we try to be more helpful
+        TEST_FOR_EINVAL(getdelim(&lineptr, &n, 25600, stream));
+#endif
+
+#undef TEST_FOR_EINVAL
+    printf("\n");
+}
+
+void test_ferror() {
+    // TODO test behavior on error flag set
+}
+
+void test_read_and_overread() {
+    // Basic use cases
+    // Read all INFILE_LINES lines of sample input, then overread OVERREAD_LINES times
+
+    size_t  n        = 0;
+    char    *lineptr = NULL;
+    ssize_t status;
+    FILE    *stream  = NULL;
+
+    stream = fopen(INFILE_NAME, "r");
+
+    for (size_t i = 0; i < INFILE_LINES; ++i) {
+        /// "Upon successful completion, the getline() and getdelim() functions
+        /// shall return the number of bytes written into the buffer, including
+        /// the delimiter character if one was encountered before EOF, but
+        /// excluding the terminating NUL character."
+        printf("%3zu: ", i + 1);
+        status = getline(&lineptr, &n, stream);
+
+        /// "If the end-of-file indicator for the stream is set, or if no
+        /// characters were read and the stream is at end-of-file, the
+        /// end-of-file indicator for the stream shall be set and the function
+        /// shall return -1."
+        assert(!(status == -1 && feof(stream)));
+
+        /// "If an error occurs, the error indicator for the stream shall be
+        /// set, and the function shall return -1 and set errno to indicate the
+        /// error."
+        ERROR_IF(getline, status, == -1 && ferror(stream));
+
+        // This should only execute if the other error cases (with stream flags set) did not abort.
+        UNEXP_IF(getline, status, == -1);
+
+        // Print length and content to verify. Also test if the buffer is big
+        // enough and if strlen of input matches the return value.
+        // Although this doesn't HAVE to be true, as the input could contain
+        // NUL bytes, ours doesn't.
+        printf("status = %zi, strlen = %zu, feof = %s, ferror = %s\n     |>%s",
+               status, strlen(lineptr), feof(stream) ? "1" : "0", ferror(stream) ? "1" : "0", lineptr);
+        // fflush(stdout);
+
+        assert(strlen(lineptr) == (size_t) status); // we can cast to size_t since we
+        assert(n >= (size_t) status + 1);           // UNEXP_IF against status being -1
+    }
+    printf("\n");
+
+    // OVERREAD
+
+    for (size_t i = 0; i < OVERREAD_LINES; ++i) {
+        /// "If the end-of-file indicator for the stream is set, or if no
+        /// characters were read and the stream is at end-of-file, the
+        /// end-of-file indicator for the stream shall be set and the function
+        /// shall return -1."
+        status = getline(&lineptr, &n, stream);
+        printf("overread %zu, status = %zi, feof = %s, ferror = %s\n",
+                i + 1,
+                status,
+                feof(stream) ? "1" : "0", ferror(stream) ? "1" : "0");
+        if (i == 0) {
+            assert(status == -1);
+            assert(feof(stream));
+            assert(!ferror(stream));
+        }
+        printf("|~%s\n", lineptr);
+    }
+
+    // cleanup
+    fclose(stream);
+    free(lineptr);
+}
diff --git a/tests/stdio/getline.in b/tests/stdio/getline.in
new file mode 100644
index 000000000..8399a9b4c
--- /dev/null
+++ b/tests/stdio/getline.in
@@ -0,0 +1,7 @@
+Space: the final frontier.
+
+These are the voyages of the starship Enterprise.
+Its continuing mission:
+- to explore strange new worlds;
+- to seek out new life and new civilizations;
+- to boldly go where no one has gone before!
diff --git a/tests/test_helpers.h b/tests/test_helpers.h
index c03a8c105..5d7cd802a 100644
--- a/tests/test_helpers.h
+++ b/tests/test_helpers.h
@@ -71,6 +71,7 @@
             fprintf(stderr, _Generic((status), \
                 char *: "char*(%p) = \"%1$s\"", \
                 void *: "void*(%p)", \
+                ssize_t: "%li", \
                 default: "%i" \
             ), status); \
             fprintf(stderr, "\n"); \
@@ -90,4 +91,27 @@
 
 #define random_bool() (lrand48() % 2 == 0)
 
+// Quick helper for checking desired errno status.
+// Use as macro: CHECK_AND_PRINT_ERRNO(<desired errno, e.g. EINVAL>);
+// If errno is as expected, prints this; otherwise, prints expected vs. actual and exit.
+void printf_errno(int errnoval, char *errnoname) {
+
+    printf("%d (%s)",
+               errno, strerror(errno));
+    if (errno != errnoval) {
+        printf("\n^^^^^ FAILURE ^^^^^ (SHOULD BE %d - %s (%s))\n",
+               errnoval, errnoname, strerror(errnoval));
+    } else {
+        printf(" - %s\n", errnoname);
+    }
+}
+
+#define CHECK_AND_PRINT_ERRNO(errnoval)     \
+    do {                                    \
+        printf_errno(errnoval, #errnoval);  \
+        if (errnoval != errno) {            \
+            exit(EXIT_FAILURE);             \
+        }                                   \
+    } while(0);
+
 #endif /* _TEST_HELPERS */
-- 
GitLab