Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • redox-os/redoxfs
  • jD91mZM2/redoxfs
  • microcolonel/redoxfs
  • rm-dr/redoxfs
  • deepaksirone/redoxfs
  • sevenEng/redoxfs
  • mortona/redoxfs
  • potatogim/redoxfs
  • 4lDO2/redoxfs
  • malandrakisgeo/redoxfs
  • ssd/redoxfs
  • dahc/redoxfs
  • ashton/redoxfs
  • usapmz/redoxfs
  • vadorovsky/redoxfs
  • bjorn3/redoxfs
  • rw_van/redoxfs
  • mkroening/redoxfs
  • emaxx-g/redoxfs
  • CILP/redoxfs
  • AnandSMain/redoxfs
  • aaronjanse/redoxfs
  • liamnprg/redoxfs
  • coolreader18/redoxfs
  • freewilll/redoxfs
  • adi-g15/redoxfs
  • andrey.turkin/redoxfs
  • matlik/redoxfs
28 results
Show changes
#[cfg(all(not(target_os = "redox"), not(fuzzing)))]
mod fuse;
#[cfg(all(not(target_os = "redox"), fuzzing))]
pub mod fuse;
#[cfg(not(target_os = "redox"))]
pub use self::fuse::mount;
#[cfg(target_os = "redox")]
mod redox;
#[cfg(target_os = "redox")]
pub use self::redox::mount;
use redox_scheme::{RequestKind, SignalBehavior, Socket, V2};
use std::io;
use std::path::Path;
use std::sync::atomic::Ordering;
use crate::{Disk, FileSystem, Transaction, IS_UMT};
use self::scheme::FileScheme;
pub mod resource;
pub mod scheme;
pub fn mount<D, P, T, F>(filesystem: FileSystem<D>, mountpoint: P, mut callback: F) -> io::Result<T>
where
D: Disk,
P: AsRef<Path>,
F: FnOnce(&Path) -> T,
{
let mountpoint = mountpoint.as_ref();
let socket = Socket::<V2>::create(&format!("{}", mountpoint.display()))?;
let mounted_path = format!("/scheme/{}", mountpoint.display());
let res = callback(Path::new(&mounted_path));
let mut scheme = FileScheme::new(format!("{}", mountpoint.display()), filesystem);
while IS_UMT.load(Ordering::SeqCst) == 0 {
let req = match socket.next_request(SignalBehavior::Restart)? {
None => break,
Some(req) => {
if let RequestKind::Call(r) = req.kind() {
r
} else {
// TODO: Redoxfs does not yet support asynchronous file IO. It might still make
// sense to implement cancellation for huge buffers, e.g. dd bs=1G
continue;
}
}
};
let response = req.handle_scheme_mut(&mut scheme);
if !socket.write_response(response, SignalBehavior::Restart)? {
break;
}
}
// Squash allocations and sync on unmount
let _ = Transaction::new(&mut scheme.fs).commit(true);
Ok(res)
}
use std::slice;
use std::time::{SystemTime, UNIX_EPOCH};
use alloc::collections::BTreeMap;
use libredox::call::MmapArgs;
use range_tree::RangeTree;
use syscall::data::{Stat, TimeSpec};
use syscall::error::{Error, Result, EBADF, EINVAL, EISDIR, EPERM};
use syscall::flag::{
MapFlags, F_GETFL, F_SETFL, MODE_PERM, O_ACCMODE, O_APPEND, O_RDONLY, O_RDWR, O_WRONLY,
PROT_READ, PROT_WRITE,
};
use syscall::{EBADFD, PAGE_SIZE};
use crate::{Disk, Node, Transaction, TreePtr};
pub type Fmaps = BTreeMap<u32, FileMmapInfo>;
pub trait Resource<D: Disk> {
fn parent_ptr_opt(&self) -> Option<TreePtr<Node>>;
fn node_ptr(&self) -> TreePtr<Node>;
fn uid(&self) -> u32;
fn dup(&self) -> Result<Box<dyn Resource<D>>>;
fn set_path(&mut self, path: &str);
fn read(&mut self, buf: &mut [u8], offset: u64, tx: &mut Transaction<D>) -> Result<usize>;
fn write(&mut self, buf: &[u8], offset: u64, tx: &mut Transaction<D>) -> Result<usize>;
fn fsize(&mut self, tx: &mut Transaction<D>) -> Result<u64>;
fn fmap(
&mut self,
fmaps: &mut Fmaps,
flags: MapFlags,
size: usize,
offset: u64,
tx: &mut Transaction<D>,
) -> Result<usize>;
fn funmap(
&mut self,
fmaps: &mut Fmaps,
offset: u64,
size: usize,
tx: &mut Transaction<D>,
) -> Result<usize>;
fn fchmod(&mut self, mode: u16, tx: &mut Transaction<D>) -> Result<usize> {
let mut node = tx.read_tree(self.node_ptr())?;
if node.data().uid() == self.uid() || self.uid() == 0 {
let old_mode = node.data().mode();
let new_mode = (old_mode & !MODE_PERM) | (mode & MODE_PERM);
if old_mode != new_mode {
node.data_mut().set_mode(new_mode);
tx.sync_tree(node)?;
}
Ok(0)
} else {
Err(Error::new(EPERM))
}
}
fn fchown(&mut self, uid: u32, gid: u32, tx: &mut Transaction<D>) -> Result<usize> {
let mut node = tx.read_tree(self.node_ptr())?;
let old_uid = node.data().uid();
if old_uid == self.uid() || self.uid() == 0 {
let mut node_changed = false;
if uid as i32 != -1 {
if uid != old_uid {
node.data_mut().set_uid(uid);
node_changed = true;
}
}
if gid as i32 != -1 {
let old_gid = node.data().gid();
if gid != old_gid {
node.data_mut().set_gid(gid);
node_changed = true;
}
}
if node_changed {
tx.sync_tree(node)?;
}
Ok(0)
} else {
Err(Error::new(EPERM))
}
}
fn fcntl(&mut self, cmd: usize, arg: usize) -> Result<usize>;
fn path(&self) -> &str;
fn stat(&self, stat: &mut Stat, tx: &mut Transaction<D>) -> Result<usize> {
let node = tx.read_tree(self.node_ptr())?;
let ctime = node.data().ctime();
let mtime = node.data().mtime();
let atime = node.data().atime();
*stat = Stat {
st_dev: 0, // TODO
st_ino: node.id() as u64,
st_mode: node.data().mode(),
st_nlink: node.data().links(),
st_uid: node.data().uid(),
st_gid: node.data().gid(),
st_size: node.data().size(),
st_mtime: mtime.0,
st_mtime_nsec: mtime.1,
st_atime: atime.0,
st_atime_nsec: atime.1,
st_ctime: ctime.0,
st_ctime_nsec: ctime.1,
..Default::default()
};
Ok(0)
}
fn sync(&mut self, fmaps: &mut Fmaps, tx: &mut Transaction<D>) -> Result<usize>;
fn truncate(&mut self, len: usize, tx: &mut Transaction<D>) -> Result<usize>;
fn utimens(&mut self, times: &[TimeSpec], tx: &mut Transaction<D>) -> Result<usize>;
}
pub struct DirResource {
path: String,
parent_ptr_opt: Option<TreePtr<Node>>,
node_ptr: TreePtr<Node>,
data: Option<Vec<u8>>,
uid: u32,
}
impl DirResource {
pub fn new(
path: String,
parent_ptr_opt: Option<TreePtr<Node>>,
node_ptr: TreePtr<Node>,
data: Option<Vec<u8>>,
uid: u32,
) -> DirResource {
DirResource {
path,
parent_ptr_opt,
node_ptr,
data,
uid,
}
}
}
impl<D: Disk> Resource<D> for DirResource {
fn parent_ptr_opt(&self) -> Option<TreePtr<Node>> {
self.parent_ptr_opt
}
fn node_ptr(&self) -> TreePtr<Node> {
self.node_ptr
}
fn uid(&self) -> u32 {
self.uid
}
fn dup(&self) -> Result<Box<dyn Resource<D>>> {
Ok(Box::new(DirResource {
path: self.path.clone(),
parent_ptr_opt: self.parent_ptr_opt,
node_ptr: self.node_ptr,
data: self.data.clone(),
uid: self.uid,
}))
}
fn set_path(&mut self, path: &str) {
self.path = path.to_string();
}
fn read(&mut self, buf: &mut [u8], offset: u64, _tx: &mut Transaction<D>) -> Result<usize> {
let data = self.data.as_ref().ok_or(Error::new(EISDIR))?;
let src = usize::try_from(offset)
.ok()
.and_then(|o| data.get(o..))
.unwrap_or(&[]);
let byte_count = core::cmp::min(src.len(), buf.len());
buf[..byte_count].copy_from_slice(&src[..byte_count]);
Ok(byte_count)
}
fn write(&mut self, _buf: &[u8], _offset: u64, _tx: &mut Transaction<D>) -> Result<usize> {
Err(Error::new(EBADF))
}
fn fsize(&mut self, _tx: &mut Transaction<D>) -> Result<u64> {
Ok(self.data.as_ref().ok_or(Error::new(EBADF))?.len() as u64)
}
fn fmap(
&mut self,
_fmaps: &mut Fmaps,
_flags: MapFlags,
_size: usize,
_offset: u64,
_tx: &mut Transaction<D>,
) -> Result<usize> {
Err(Error::new(EBADF))
}
fn funmap(
&mut self,
_fmaps: &mut Fmaps,
_offset: u64,
_size: usize,
_tx: &mut Transaction<D>,
) -> Result<usize> {
Err(Error::new(EBADF))
}
fn fcntl(&mut self, _cmd: usize, _arg: usize) -> Result<usize> {
Err(Error::new(EBADF))
}
fn path(&self) -> &str {
&self.path
}
fn sync(&mut self, _fmaps: &mut Fmaps, _tx: &mut Transaction<D>) -> Result<usize> {
Err(Error::new(EBADF))
}
fn truncate(&mut self, _len: usize, _tx: &mut Transaction<D>) -> Result<usize> {
Err(Error::new(EBADF))
}
fn utimens(&mut self, _times: &[TimeSpec], _tx: &mut Transaction<D>) -> Result<usize> {
Err(Error::new(EBADF))
}
}
#[derive(Debug)]
pub struct Fmap {
rc: usize,
flags: MapFlags,
last_page_tail: u16,
}
impl Fmap {
pub unsafe fn new<D: Disk>(
node_ptr: TreePtr<Node>,
flags: MapFlags,
unaligned_size: usize,
offset: u64,
base: *mut u8,
tx: &mut Transaction<D>,
) -> Result<Self> {
// Memory provided to fmap must be page aligned and sized
let aligned_size = unaligned_size.next_multiple_of(syscall::PAGE_SIZE);
let address = base.add(offset as usize);
//println!("ADDR {:p} {:p}", base, address);
// Read buffer from disk
let atime = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
let buf = slice::from_raw_parts_mut(address, unaligned_size);
let count = match tx.read_node(node_ptr, offset, buf, atime.as_secs(), atime.subsec_nanos())
{
Ok(ok) => ok,
Err(err) => {
let _ = libredox::call::munmap(address.cast(), aligned_size);
return Err(err);
}
};
// Make sure remaining data is zeroed
buf[count..].fill(0_u8);
Ok(Self {
rc: 1,
flags,
last_page_tail: (unaligned_size % PAGE_SIZE) as u16,
})
}
pub unsafe fn sync<D: Disk>(
&mut self,
node_ptr: TreePtr<Node>,
base: *mut u8,
offset: u64,
size: usize,
tx: &mut Transaction<D>,
) -> Result<()> {
if self.flags & PROT_WRITE == PROT_WRITE {
let mtime = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
tx.write_node(
node_ptr,
offset,
unsafe { core::slice::from_raw_parts(base.add(offset as usize), size) },
mtime.as_secs(),
mtime.subsec_nanos(),
)?;
}
Ok(())
}
}
pub struct FileResource {
path: String,
parent_ptr_opt: Option<TreePtr<Node>>,
node_ptr: TreePtr<Node>,
flags: usize,
uid: u32,
}
#[derive(Debug)]
pub struct FileMmapInfo {
base: *mut u8,
size: usize,
ranges: RangeTree<Fmap>,
pub open_fds: usize,
}
impl Default for FileMmapInfo {
fn default() -> Self {
Self {
base: core::ptr::null_mut(),
size: 0,
ranges: RangeTree::new(),
open_fds: 0,
}
}
}
impl FileResource {
pub fn new(
path: String,
parent_ptr_opt: Option<TreePtr<Node>>,
node_ptr: TreePtr<Node>,
flags: usize,
uid: u32,
) -> FileResource {
FileResource {
path,
parent_ptr_opt,
node_ptr,
flags,
uid,
}
}
}
impl<D: Disk> Resource<D> for FileResource {
fn parent_ptr_opt(&self) -> Option<TreePtr<Node>> {
self.parent_ptr_opt
}
fn node_ptr(&self) -> TreePtr<Node> {
self.node_ptr
}
fn uid(&self) -> u32 {
self.uid
}
fn dup(&self) -> Result<Box<dyn Resource<D>>> {
Ok(Box::new(FileResource {
path: self.path.clone(),
parent_ptr_opt: self.parent_ptr_opt,
node_ptr: self.node_ptr,
flags: self.flags,
uid: self.uid,
}))
}
fn set_path(&mut self, path: &str) {
self.path = path.to_string();
}
fn read(&mut self, buf: &mut [u8], offset: u64, tx: &mut Transaction<D>) -> Result<usize> {
if self.flags & O_ACCMODE != O_RDWR && self.flags & O_ACCMODE != O_RDONLY {
return Err(Error::new(EBADF));
}
let atime = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
tx.read_node(
self.node_ptr,
offset,
buf,
atime.as_secs(),
atime.subsec_nanos(),
)
}
fn write(&mut self, buf: &[u8], offset: u64, tx: &mut Transaction<D>) -> Result<usize> {
if self.flags & O_ACCMODE != O_RDWR && self.flags & O_ACCMODE != O_WRONLY {
return Err(Error::new(EBADF));
}
let effective_offset = if self.flags & O_APPEND == O_APPEND {
let node = tx.read_tree(self.node_ptr)?;
node.data().size()
} else {
offset
};
let mtime = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
tx.write_node(
self.node_ptr,
effective_offset,
buf,
mtime.as_secs(),
mtime.subsec_nanos(),
)
}
fn fsize(&mut self, tx: &mut Transaction<D>) -> Result<u64> {
let node = tx.read_tree(self.node_ptr)?;
Ok(node.data().size())
}
fn fmap(
&mut self,
fmaps: &mut Fmaps,
flags: MapFlags,
unaligned_size: usize,
offset: u64,
tx: &mut Transaction<D>,
) -> Result<usize> {
//dbg!(&self.fmaps);
let accmode = self.flags & O_ACCMODE;
if flags.contains(PROT_READ) && !(accmode == O_RDWR || accmode == O_RDONLY) {
return Err(Error::new(EBADF));
}
if flags.contains(PROT_WRITE) && !(accmode == O_RDWR || accmode == O_WRONLY) {
return Err(Error::new(EBADF));
}
let aligned_size = unaligned_size.next_multiple_of(PAGE_SIZE);
// TODO: PROT_EXEC? It is however unenforcable without restricting anonymous mmap, since a
// program can always map anonymous RW-, read from a file, then remap as R-E. But it might
// be usable as a hint, prohibiting direct executable mmaps at least.
// TODO: Pass entry directory to Resource trait functions, since the node_ptr can be
// obtained by the caller.
let fmap_info = fmaps
.get_mut(&self.node_ptr.id())
.ok_or(Error::new(EBADFD))?;
let new_size = (offset as usize + aligned_size).next_multiple_of(PAGE_SIZE);
if new_size > fmap_info.size {
fmap_info.base = if fmap_info.base.is_null() {
unsafe {
libredox::call::mmap(MmapArgs {
length: new_size,
// PRIVATE/SHARED doesn't matter once the pages are passed in the fmap
// handler.
prot: libredox::flag::PROT_READ | libredox::flag::PROT_WRITE,
flags: libredox::flag::MAP_PRIVATE,
offset: 0,
fd: !0,
addr: core::ptr::null_mut(),
})? as *mut u8
}
} else {
unsafe {
syscall::syscall5(
syscall::SYS_MREMAP,
fmap_info.base as usize,
fmap_info.size,
0,
new_size,
syscall::MremapFlags::empty().bits() | (PROT_READ | PROT_WRITE).bits(),
)? as *mut u8
}
};
fmap_info.size = new_size;
}
let affected_fmaps = fmap_info
.ranges
.remove_and_unused(offset..offset + aligned_size as u64);
for (range, v_opt) in affected_fmaps {
//dbg!(&range);
if let Some(mut fmap) = v_opt {
fmap.rc += 1;
fmap.flags |= flags;
fmap_info
.ranges
.insert(range.start, range.end - range.start, fmap);
} else {
let map = unsafe {
Fmap::new(
self.node_ptr,
flags,
unaligned_size,
offset,
fmap_info.base,
tx,
)?
};
fmap_info.ranges.insert(offset, aligned_size as u64, map);
}
}
//dbg!(&self.fmaps);
Ok(fmap_info.base as usize + offset as usize)
}
fn funmap(
&mut self,
fmaps: &mut Fmaps,
offset: u64,
size: usize,
tx: &mut Transaction<D>,
) -> Result<usize> {
let fmap_info = fmaps
.get_mut(&self.node_ptr.id())
.ok_or(Error::new(EBADFD))?;
//dbg!(&self.fmaps);
//dbg!(self.fmaps.conflicts(offset..offset + size as u64).collect::<Vec<_>>());
#[allow(unused_mut)]
let mut affected_fmaps = fmap_info.ranges.remove(offset..offset + size as u64);
for (range, mut fmap) in affected_fmaps {
fmap.rc = fmap.rc.checked_sub(1).unwrap();
//log::info!("SYNCING {}..{}", range.start, range.end);
unsafe {
fmap.sync(
self.node_ptr,
fmap_info.base,
range.start,
(range.end - range.start) as usize,
tx,
)?;
}
if fmap.rc > 0 {
fmap_info
.ranges
.insert(range.start, range.end - range.start, fmap);
}
}
//dbg!(&self.fmaps);
Ok(0)
}
fn fcntl(&mut self, cmd: usize, arg: usize) -> Result<usize> {
match cmd {
F_GETFL => Ok(self.flags),
F_SETFL => {
self.flags = (self.flags & O_ACCMODE) | (arg & !O_ACCMODE);
Ok(0)
}
_ => Err(Error::new(EINVAL)),
}
}
fn path(&self) -> &str {
&self.path
}
fn sync(&mut self, fmaps: &mut Fmaps, tx: &mut Transaction<D>) -> Result<usize> {
if let Some(fmap_info) = fmaps.get_mut(&self.node_ptr.id()) {
for (range, fmap) in fmap_info.ranges.iter_mut() {
unsafe {
fmap.sync(
self.node_ptr,
fmap_info.base,
range.start,
(range.end - range.start) as usize,
tx,
)?;
}
}
}
Ok(0)
}
fn truncate(&mut self, len: usize, tx: &mut Transaction<D>) -> Result<usize> {
if self.flags & O_ACCMODE == O_RDWR || self.flags & O_ACCMODE == O_WRONLY {
let mtime = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
tx.truncate_node(
self.node_ptr,
len as u64,
mtime.as_secs(),
mtime.subsec_nanos(),
)?;
Ok(0)
} else {
Err(Error::new(EBADF))
}
}
fn utimens(&mut self, times: &[TimeSpec], tx: &mut Transaction<D>) -> Result<usize> {
let mut node = tx.read_tree(self.node_ptr)?;
if node.data().uid() == self.uid || self.uid == 0 {
if let &[atime, mtime] = times {
let mut node_changed = false;
let old_mtime = node.data().mtime();
let new_mtime = (mtime.tv_sec as u64, mtime.tv_nsec as u32);
if old_mtime != new_mtime {
node.data_mut().set_mtime(new_mtime.0, new_mtime.1);
node_changed = true;
}
let old_atime = node.data().atime();
let new_atime = (atime.tv_sec as u64, atime.tv_nsec as u32);
if old_atime != new_atime {
node.data_mut().set_atime(new_atime.0, new_atime.1);
node_changed = true;
}
if node_changed {
tx.sync_tree(node)?;
}
}
Ok(0)
} else {
Err(Error::new(EPERM))
}
}
}
impl Drop for FileResource {
fn drop(&mut self) {
/*
if !self.fmaps.is_empty() {
eprintln!(
"redoxfs: file {} still has {} fmaps!",
self.path,
self.fmaps.len()
);
}
*/
}
}
impl range_tree::Value for Fmap {
type K = u64;
fn try_merge_forward(self, other: &Self) -> core::result::Result<Self, Self> {
if self.rc == other.rc && self.flags == other.flags && self.last_page_tail == 0 {
Ok(self)
} else {
Err(self)
}
}
fn try_merge_backwards(self, other: &Self) -> core::result::Result<Self, Self> {
if self.rc == other.rc && self.flags == other.flags && other.last_page_tail == 0 {
Ok(self)
} else {
Err(self)
}
}
#[allow(unused_variables)]
fn split(
self,
prev_range: Option<core::ops::Range<Self::K>>,
range: core::ops::Range<Self::K>,
next_range: Option<core::ops::Range<Self::K>>,
) -> (Option<Self>, Self, Option<Self>) {
(
prev_range.map(|_range| Fmap {
rc: self.rc,
flags: self.flags,
last_page_tail: 0,
}),
Fmap {
rc: self.rc,
flags: self.flags,
last_page_tail: if next_range.is_none() {
self.last_page_tail
} else {
0
},
},
next_range.map(|_range| Fmap {
rc: self.rc,
flags: self.flags,
last_page_tail: self.last_page_tail,
}),
)
}
}
use std::collections::BTreeMap;
use std::str;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::time::{SystemTime, UNIX_EPOCH};
use redox_scheme::{CallerCtx, OpenResult, SchemeMut};
use syscall::data::{Stat, StatVfs, TimeSpec};
use syscall::error::{
Error, Result, EACCES, EBADF, EBUSY, EEXIST, EINVAL, EISDIR, ELOOP, ENOENT, ENOTDIR, ENOTEMPTY,
EPERM, EXDEV,
};
use syscall::flag::{
EventFlags, MapFlags, O_ACCMODE, O_CREAT, O_DIRECTORY, O_EXCL, O_NOFOLLOW, O_RDONLY, O_RDWR,
O_STAT, O_SYMLINK, O_TRUNC, O_WRONLY,
};
use syscall::schemev2::NewFdFlags;
use syscall::{MunmapFlags, EBADFD};
use redox_path::{
canonicalize_to_standard, canonicalize_using_cwd, canonicalize_using_scheme, scheme_path,
RedoxPath,
};
use crate::{Disk, FileSystem, Node, Transaction, TreeData, TreePtr, BLOCK_SIZE};
use super::resource::{DirResource, FileResource, Resource};
pub struct FileScheme<D: Disk> {
name: String,
pub(crate) fs: FileSystem<D>,
next_id: AtomicUsize,
files: BTreeMap<usize, Box<dyn Resource<D>>>,
fmap: super::resource::Fmaps,
}
impl<D: Disk> FileScheme<D> {
pub fn new(name: String, fs: FileSystem<D>) -> FileScheme<D> {
FileScheme {
name,
fs,
next_id: AtomicUsize::new(1),
files: BTreeMap::new(),
fmap: BTreeMap::new(),
}
}
fn resolve_symlink(
scheme_name: &str,
tx: &mut Transaction<D>,
uid: u32,
gid: u32,
full_path: &str,
node: TreeData<Node>,
nodes: &mut Vec<(TreeData<Node>, String)>,
) -> Result<String> {
let atime = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
// symbolic link is relative to this part of the url
let mut working_dir =
dirname(full_path).unwrap_or(scheme_path(scheme_name).ok_or(Error::new(EINVAL))?);
// node of the link
let mut node = node;
for _ in 0..32 {
// XXX What should the limit be?
assert!(node.data().is_symlink());
let mut buf = [0; 4096];
let count = tx.read_node(
node.ptr(),
0,
&mut buf,
atime.as_secs(),
atime.subsec_nanos(),
)?;
let target = canonicalize_to_standard(
Some(&working_dir),
str::from_utf8(&buf[..count]).or(Err(Error::new(EINVAL)))?,
)
.ok_or(Error::new(EINVAL))?;
let target_as_path = RedoxPath::from_absolute(&target).ok_or(Error::new(EINVAL))?;
let (scheme, reference) = target_as_path.as_parts().ok_or(Error::new(EINVAL))?;
if scheme.as_ref() != scheme_name {
return Err(Error::new(EXDEV));
}
let target_reference = reference.to_string();
nodes.clear();
if let Some((next_node, next_node_name)) =
Self::path_nodes(scheme_name, tx, &target_reference, uid, gid, nodes)?
{
if !next_node.data().is_symlink() {
nodes.push((next_node, next_node_name));
return Ok(target_reference);
}
node = next_node;
working_dir = dirname(&target).ok_or(Error::new(EINVAL))?.to_string();
} else {
return Err(Error::new(ENOENT));
}
}
Err(Error::new(ELOOP))
}
fn path_nodes(
scheme_name: &str,
tx: &mut Transaction<D>,
path: &str,
uid: u32,
gid: u32,
nodes: &mut Vec<(TreeData<Node>, String)>,
) -> Result<Option<(TreeData<Node>, String)>> {
let mut parts = path.split('/').filter(|part| !part.is_empty());
let mut part_opt: Option<&str> = None;
let mut node_ptr = TreePtr::root();
let mut node_name = String::new();
loop {
let node_res = match part_opt {
None => tx.read_tree(node_ptr),
Some(part) => {
node_name = part.to_string();
tx.find_node(node_ptr, part)
}
};
part_opt = parts.next();
if let Some(part) = part_opt {
let node = node_res?;
if !node.data().permission(uid, gid, Node::MODE_EXEC) {
return Err(Error::new(EACCES));
}
if node.data().is_symlink() {
let mut url = String::new();
url.push_str(scheme_name);
url.push(':');
for (_parent, parent_name) in nodes.iter() {
url.push('/');
url.push_str(&parent_name);
}
Self::resolve_symlink(scheme_name, tx, uid, gid, &url, node, nodes)?;
node_ptr = nodes.last().unwrap().0.ptr();
} else if !node.data().is_dir() {
return Err(Error::new(ENOTDIR));
} else {
node_ptr = node.ptr();
nodes.push((node, part.to_string()));
}
} else {
match node_res {
Ok(node) => return Ok(Some((node, node_name))),
Err(err) => match err.errno {
ENOENT => return Ok(None),
_ => return Err(err),
},
}
}
}
}
}
/// given a path with a scheme, return the containing directory (or scheme)
fn dirname(path: &str) -> Option<String> {
canonicalize_using_cwd(Some(path), "..")
}
impl<D: Disk> SchemeMut for FileScheme<D> {
fn xopen(&mut self, url: &str, flags: usize, ctx: &CallerCtx) -> Result<OpenResult> {
let CallerCtx { uid, gid, .. } = *ctx;
let path = url.trim_matches('/');
// println!("Open '{}' {:X}", path, flags);
//TODO: try to move things into one transaction
let scheme_name = &self.name;
let mut nodes = Vec::new();
let node_opt = self
.fs
.tx(|tx| Self::path_nodes(scheme_name, tx, path, uid, gid, &mut nodes))?;
let parent_ptr_opt = nodes.last().map(|x| x.0.ptr());
let resource: Box<dyn Resource<D>> = match node_opt {
Some((node, _node_name)) => {
if flags & (O_CREAT | O_EXCL) == O_CREAT | O_EXCL {
return Err(Error::new(EEXIST));
} else if node.data().is_dir() {
if flags & O_ACCMODE == O_RDONLY {
if !node.data().permission(uid, gid, Node::MODE_READ) {
// println!("dir not readable {:o}", node.data().mode);
return Err(Error::new(EACCES));
}
let mut children = Vec::new();
self.fs.tx(|tx| tx.child_nodes(node.ptr(), &mut children))?;
let mut data = Vec::new();
for child in children.iter() {
if let Some(child_name) = child.name() {
if !data.is_empty() {
data.push(b'\n');
}
data.extend_from_slice(&child_name.as_bytes());
}
}
Box::new(DirResource::new(
path.to_string(),
parent_ptr_opt,
node.ptr(),
Some(data),
uid,
))
} else if flags & O_WRONLY == O_WRONLY {
// println!("{:X} & {:X}: EISDIR {}", flags, O_DIRECTORY, path);
return Err(Error::new(EISDIR));
} else {
Box::new(DirResource::new(
path.to_string(),
parent_ptr_opt,
node.ptr(),
None,
uid,
))
}
} else if node.data().is_symlink()
&& !(flags & O_STAT == O_STAT && flags & O_NOFOLLOW == O_NOFOLLOW)
&& flags & O_SYMLINK != O_SYMLINK
{
let mut resolve_nodes = Vec::new();
let full_path =
canonicalize_using_scheme(scheme_name, url).ok_or(Error::new(EINVAL))?;
let resolved = self.fs.tx(|tx| {
Self::resolve_symlink(
scheme_name,
tx,
uid,
gid,
&full_path,
node,
&mut resolve_nodes,
)
})?;
return self.xopen(&resolved, flags, ctx);
} else if !node.data().is_symlink() && flags & O_SYMLINK == O_SYMLINK {
return Err(Error::new(EINVAL));
} else {
let node_ptr = node.ptr();
if flags & O_DIRECTORY == O_DIRECTORY {
// println!("{:X} & {:X}: ENOTDIR {}", flags, O_DIRECTORY, path);
return Err(Error::new(ENOTDIR));
}
if (flags & O_ACCMODE == O_RDONLY || flags & O_ACCMODE == O_RDWR)
&& !node.data().permission(uid, gid, Node::MODE_READ)
{
// println!("file not readable {:o}", node.data().mode);
return Err(Error::new(EACCES));
}
if (flags & O_ACCMODE == O_WRONLY || flags & O_ACCMODE == O_RDWR)
&& !node.data().permission(uid, gid, Node::MODE_WRITE)
{
// println!("file not writable {:o}", node.data().mode);
return Err(Error::new(EACCES));
}
if flags & O_TRUNC == O_TRUNC {
if !node.data().permission(uid, gid, Node::MODE_WRITE) {
// println!("file not writable {:o}", node.data().mode);
return Err(Error::new(EACCES));
}
let mtime = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
self.fs.tx(|tx| {
tx.truncate_node(node_ptr, 0, mtime.as_secs(), mtime.subsec_nanos())
})?;
}
Box::new(FileResource::new(
path.to_string(),
parent_ptr_opt,
node_ptr,
flags,
uid,
))
}
}
None => {
if flags & O_CREAT == O_CREAT {
let mut last_part = String::new();
for part in path.split('/') {
if !part.is_empty() {
last_part = part.to_string();
}
}
if !last_part.is_empty() {
if let Some((parent, _parent_name)) = nodes.last() {
if !parent.data().permission(uid, gid, Node::MODE_WRITE) {
// println!("dir not writable {:o}", parent.1.mode);
return Err(Error::new(EACCES));
}
let dir = flags & O_DIRECTORY == O_DIRECTORY;
let mode_type = if dir {
Node::MODE_DIR
} else if flags & O_SYMLINK == O_SYMLINK {
Node::MODE_SYMLINK
} else {
Node::MODE_FILE
};
let node_ptr = self.fs.tx(|tx| {
let ctime = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
let mut node = tx.create_node(
parent.ptr(),
&last_part,
mode_type | (flags as u16 & Node::MODE_PERM),
ctime.as_secs(),
ctime.subsec_nanos(),
)?;
let node_ptr = node.ptr();
if node.data().uid() != uid || node.data().gid() != gid {
node.data_mut().set_uid(uid);
node.data_mut().set_gid(gid);
tx.sync_tree(node)?;
}
Ok(node_ptr)
})?;
if dir {
Box::new(DirResource::new(
path.to_string(),
parent_ptr_opt,
node_ptr,
None,
uid,
))
} else {
Box::new(FileResource::new(
path.to_string(),
parent_ptr_opt,
node_ptr,
flags,
uid,
))
}
} else {
return Err(Error::new(EPERM));
}
} else {
return Err(Error::new(EPERM));
}
} else {
return Err(Error::new(ENOENT));
}
}
};
self.fmap
.entry(resource.node_ptr().id())
.or_insert_with(Default::default)
.open_fds += 1;
let id = self.next_id.fetch_add(1, Ordering::Relaxed);
self.files.insert(id, resource);
Ok(OpenResult::ThisScheme {
number: id,
flags: NewFdFlags::POSITIONED,
})
}
fn rmdir(&mut self, url: &str, uid: u32, gid: u32) -> Result<usize> {
let path = url.trim_matches('/');
// println!("Rmdir '{}'", path);
let scheme_name = &self.name;
self.fs.tx(|tx| {
let mut nodes = Vec::new();
let Some((child, child_name)) =
Self::path_nodes(scheme_name, tx, path, uid, gid, &mut nodes)?
else {
return Err(Error::new(ENOENT));
};
let Some((parent, _parent_name)) = nodes.last() else {
return Err(Error::new(EPERM));
};
if !parent.data().permission(uid, gid, Node::MODE_WRITE) {
// println!("dir not writable {:o}", parent.1.mode);
return Err(Error::new(EACCES));
}
if child.data().is_dir() {
if !child.data().permission(uid, gid, Node::MODE_WRITE) {
// println!("dir not writable {:o}", parent.1.mode);
return Err(Error::new(EACCES));
}
tx.remove_node(parent.ptr(), &child_name, Node::MODE_DIR)
.and(Ok(0))
} else {
Err(Error::new(ENOTDIR))
}
})
}
fn unlink(&mut self, url: &str, uid: u32, gid: u32) -> Result<usize> {
let path = url.trim_matches('/');
// println!("Unlink '{}'", path);
let scheme_name = &self.name;
self.fs.tx(|tx| {
let mut nodes = Vec::new();
let Some((child, child_name)) =
Self::path_nodes(scheme_name, tx, path, uid, gid, &mut nodes)?
else {
return Err(Error::new(ENOENT));
};
let Some((parent, _parent_name)) = nodes.last() else {
return Err(Error::new(EPERM));
};
if !parent.data().permission(uid, gid, Node::MODE_WRITE) {
// println!("dir not writable {:o}", parent.1.mode);
return Err(Error::new(EACCES));
}
if !child.data().is_dir() {
if child.data().uid() != uid && uid != 0 {
// println!("file not owned by current user {}", parent.1.uid);
return Err(Error::new(EACCES));
}
if child.data().is_symlink() {
tx.remove_node(parent.ptr(), &child_name, Node::MODE_SYMLINK)
.and(Ok(0))
} else {
tx.remove_node(parent.ptr(), &child_name, Node::MODE_FILE)
.and(Ok(0))
}
} else {
Err(Error::new(EISDIR))
}
})
}
/* Resource operations */
#[allow(unused_variables)]
fn dup(&mut self, old_id: usize, buf: &[u8]) -> Result<usize> {
// println!("Dup {}", old_id);
if !buf.is_empty() {
return Err(Error::new(EINVAL));
}
let resource = if let Some(old_resource) = self.files.get(&old_id) {
old_resource.dup()?
} else {
return Err(Error::new(EBADF));
};
self.fmap
.get_mut(&resource.node_ptr().id())
.ok_or(Error::new(EBADFD))?
.open_fds += 1;
let id = self.next_id.fetch_add(1, Ordering::SeqCst);
self.files.insert(id, resource);
Ok(id)
}
fn read(&mut self, id: usize, buf: &mut [u8], offset: u64, _fcntl_flags: u32) -> Result<usize> {
// println!("Read {}, {:X} {}", id, buf.as_ptr() as usize, buf.len());
let file = self.files.get_mut(&id).ok_or(Error::new(EBADF))?;
self.fs.tx(|tx| file.read(buf, offset, tx))
}
fn write(&mut self, id: usize, buf: &[u8], offset: u64, _fcntl_flags: u32) -> Result<usize> {
// println!("Write {}, {:X} {}", id, buf.as_ptr() as usize, buf.len());
let file = self.files.get_mut(&id).ok_or(Error::new(EBADF))?;
self.fs.tx(|tx| file.write(buf, offset, tx))
}
fn fsize(&mut self, id: usize) -> Result<u64> {
// println!("Seek {}, {} {}", id, pos, whence);
let file = self.files.get_mut(&id).ok_or(Error::new(EBADF))?;
self.fs.tx(|tx| file.fsize(tx))
}
fn fchmod(&mut self, id: usize, mode: u16) -> Result<usize> {
if let Some(file) = self.files.get_mut(&id) {
self.fs.tx(|tx| file.fchmod(mode, tx))
} else {
Err(Error::new(EBADF))
}
}
fn fchown(&mut self, id: usize, uid: u32, gid: u32) -> Result<usize> {
if let Some(file) = self.files.get_mut(&id) {
self.fs.tx(|tx| file.fchown(uid, gid, tx))
} else {
Err(Error::new(EBADF))
}
}
fn fcntl(&mut self, id: usize, cmd: usize, arg: usize) -> Result<usize> {
if let Some(file) = self.files.get_mut(&id) {
file.fcntl(cmd, arg)
} else {
Err(Error::new(EBADF))
}
}
fn fevent(&mut self, id: usize, _flags: EventFlags) -> Result<EventFlags> {
if let Some(_file) = self.files.get(&id) {
// EPERM is returned for files that are always readable or writable
Err(Error::new(EPERM))
} else {
Err(Error::new(EBADF))
}
}
fn fpath(&mut self, id: usize, buf: &mut [u8]) -> Result<usize> {
// println!("Fpath {}, {:X} {}", id, buf.as_ptr() as usize, buf.len());
if let Some(file) = self.files.get(&id) {
let name = self.name.as_bytes();
let mut i = 0;
while i < buf.len() && i < name.len() {
buf[i] = name[i];
i += 1;
}
if i < buf.len() {
buf[i] = b':';
i += 1;
}
if i < buf.len() {
buf[i] = b'/';
i += 1;
}
let path = file.path().as_bytes();
let mut j = 0;
while i < buf.len() && j < path.len() {
buf[i] = path[j];
i += 1;
j += 1;
}
Ok(i)
} else {
Err(Error::new(EBADF))
}
}
//TODO: this function has too much code, try to simplify it
fn frename(&mut self, id: usize, url: &str, uid: u32, gid: u32) -> Result<usize> {
let new_path = url.trim_matches('/');
// println!("Frename {}, {} from {}, {}", id, new_path, uid, gid);
if let Some(file) = self.files.get_mut(&id) {
//TODO: Check for EINVAL
// The new pathname contained a path prefix of the old, or, more generally,
// an attempt was made to make a directory a subdirectory of itself.
let mut old_name = String::new();
for part in file.path().split('/') {
if !part.is_empty() {
old_name = part.to_string();
}
}
if old_name.is_empty() {
return Err(Error::new(EPERM));
}
let mut new_name = String::new();
for part in new_path.split('/') {
if !part.is_empty() {
new_name = part.to_string();
}
}
if new_name.is_empty() {
return Err(Error::new(EPERM));
}
let scheme_name = &self.name;
self.fs.tx(|tx| {
let orig_parent_ptr = match file.parent_ptr_opt() {
Some(some) => some,
None => {
// println!("orig is root");
return Err(Error::new(EBUSY));
}
};
let orig_node = tx.read_tree(file.node_ptr())?;
if !orig_node.data().owner(uid) {
// println!("orig_node not owned by caller {}", uid);
return Err(Error::new(EACCES));
}
let mut new_nodes = Vec::new();
let new_node_opt =
Self::path_nodes(scheme_name, tx, new_path, uid, gid, &mut new_nodes)?;
if let Some((ref new_parent, _)) = new_nodes.last() {
if !new_parent.data().owner(uid) {
// println!("new_parent not owned by caller {}", uid);
return Err(Error::new(EACCES));
}
if let Some((ref new_node, _)) = new_node_opt {
if !new_node.data().owner(uid) {
// println!("new dir not owned by caller {}", uid);
return Err(Error::new(EACCES));
}
if new_node.data().is_dir() {
if !orig_node.data().is_dir() {
// println!("orig_node is file, new is dir");
return Err(Error::new(EACCES));
}
let mut children = Vec::new();
tx.child_nodes(new_node.ptr(), &mut children)?;
if !children.is_empty() {
// println!("new dir not empty");
return Err(Error::new(ENOTEMPTY));
}
} else {
if orig_node.data().is_dir() {
// println!("orig_node is dir, new is file");
return Err(Error::new(ENOTDIR));
}
}
}
tx.rename_node(orig_parent_ptr, &old_name, new_parent.ptr(), &new_name)?;
file.set_path(new_path);
Ok(0)
} else {
Err(Error::new(EPERM))
}
})
} else {
Err(Error::new(EBADF))
}
}
fn fstat(&mut self, id: usize, stat: &mut Stat) -> Result<usize> {
// println!("Fstat {}, {:X}", id, stat as *mut Stat as usize);
if let Some(file) = self.files.get(&id) {
self.fs.tx(|tx| file.stat(stat, tx))
} else {
Err(Error::new(EBADF))
}
}
fn fstatvfs(&mut self, id: usize, stat: &mut StatVfs) -> Result<usize> {
if let Some(_file) = self.files.get(&id) {
stat.f_bsize = BLOCK_SIZE as u32;
stat.f_blocks = self.fs.header.size() / (stat.f_bsize as u64);
stat.f_bfree = self.fs.allocator().free();
stat.f_bavail = stat.f_bfree;
Ok(0)
} else {
Err(Error::new(EBADF))
}
}
fn fsync(&mut self, id: usize) -> Result<usize> {
// println!("Fsync {}", id);
let file = self.files.get_mut(&id).ok_or(Error::new(EBADF))?;
let fmaps = &mut self.fmap;
self.fs.tx(|tx| file.sync(fmaps, tx))
}
fn ftruncate(&mut self, id: usize, len: usize) -> Result<usize> {
// println!("Ftruncate {}, {}", id, len);
if let Some(file) = self.files.get_mut(&id) {
self.fs.tx(|tx| file.truncate(len, tx))
} else {
Err(Error::new(EBADF))
}
}
fn futimens(&mut self, id: usize, times: &[TimeSpec]) -> Result<usize> {
// println!("Futimens {}, {}", id, times.len());
if let Some(file) = self.files.get_mut(&id) {
self.fs.tx(|tx| file.utimens(times, tx))
} else {
Err(Error::new(EBADF))
}
}
fn mmap_prep(&mut self, id: usize, offset: u64, size: usize, flags: MapFlags) -> Result<usize> {
let file = self.files.get_mut(&id).ok_or(Error::new(EBADF))?;
let fmaps = &mut self.fmap;
self.fs.tx(|tx| file.fmap(fmaps, flags, size, offset, tx))
}
#[allow(unused_variables)]
fn munmap(&mut self, id: usize, offset: u64, size: usize, flags: MunmapFlags) -> Result<usize> {
let file = self.files.get_mut(&id).ok_or(Error::new(EBADF))?;
let fmaps = &mut self.fmap;
self.fs.tx(|tx| file.funmap(fmaps, offset, size, tx))
}
fn close(&mut self, id: usize) -> Result<usize> {
// println!("Close {}", id);
let file = self.files.remove(&id).ok_or(Error::new(EBADF))?;
let file_info = self
.fmap
.get_mut(&file.node_ptr().id())
.ok_or(Error::new(EBADFD))?;
file_info.open_fds = file_info
.open_fds
.checked_sub(1)
.expect("open_fds not tracked correctly");
// TODO: If open_fds reaches zero and there are no hardlinks (directory entries) to any
// particular inode, remove that inode here.
Ok(0)
}
}
use std::{fmt, mem, ops, slice, str};
use core::{fmt, mem, ops, slice};
use endian_num::Le;
use super::Extent;
use crate::{BlockLevel, BlockList, BlockPtr, BlockTrait, RecordRaw, BLOCK_SIZE, RECORD_LEVEL};
/// An index into a [`Node`]'s block table.
pub enum NodeLevel {
L0(usize),
L1(usize, usize),
L2(usize, usize, usize),
L3(usize, usize, usize, usize),
L4(usize, usize, usize, usize, usize),
}
impl NodeLevel {
// Warning: this uses constant record offsets, make sure to sync with Node
/// Return the [`NodeLevel`] of the record with the given index.
/// - the first 128 are level 0,
/// - the next 64*256 are level 1,
/// - ...and so on.
pub fn new(mut record_offset: u64) -> Option<Self> {
// 1 << 8 = 256, this is the number of entries in a BlockList
const SHIFT: u64 = 8;
const NUM: u64 = 1 << SHIFT;
const MASK: u64 = NUM - 1;
const L0: u64 = 128;
if record_offset < L0 {
return Some(Self::L0((record_offset & MASK) as usize));
} else {
record_offset -= L0;
}
const L1: u64 = 64 * NUM;
if record_offset < L1 {
return Some(Self::L1(
((record_offset >> SHIFT) & MASK) as usize,
(record_offset & MASK) as usize,
));
} else {
record_offset -= L1;
}
const L2: u64 = 32 * NUM * NUM;
if record_offset < L2 {
return Some(Self::L2(
((record_offset >> (2 * SHIFT)) & MASK) as usize,
((record_offset >> SHIFT) & MASK) as usize,
(record_offset & MASK) as usize,
));
} else {
record_offset -= L2;
}
const L3: u64 = 16 * NUM * NUM * NUM;
if record_offset < L3 {
return Some(Self::L3(
((record_offset >> (3 * SHIFT)) & MASK) as usize,
((record_offset >> (2 * SHIFT)) & MASK) as usize,
((record_offset >> SHIFT) & MASK) as usize,
(record_offset & MASK) as usize,
));
} else {
record_offset -= L3;
}
const L4: u64 = 12 * NUM * NUM * NUM * NUM;
if record_offset < L4 {
Some(Self::L4(
((record_offset >> (4 * SHIFT)) & MASK) as usize,
((record_offset >> (3 * SHIFT)) & MASK) as usize,
((record_offset >> (2 * SHIFT)) & MASK) as usize,
((record_offset >> SHIFT) & MASK) as usize,
(record_offset & MASK) as usize,
))
} else {
None
}
}
}
type BlockListL1 = BlockList<RecordRaw>;
type BlockListL2 = BlockList<BlockListL1>;
type BlockListL3 = BlockList<BlockListL2>;
type BlockListL4 = BlockList<BlockListL3>;
/// A file/folder node
#[repr(packed)]
#[repr(C, packed)]
pub struct Node {
pub mode: u16,
pub uid: u32,
pub gid: u32,
pub ctime: u64,
pub ctime_nsec: u32,
pub mtime: u64,
pub mtime_nsec: u32,
pub name: [u8; 222],
pub parent: u64,
pub next: u64,
pub extents: [Extent; 15],
/// This node's type & permissions.
/// - first four bits are permissions
/// - next four bits are permissions for the file's user
/// - next four bits are permissions for the file's group
/// - last four bits are permissions for everyone else
pub mode: Le<u16>,
/// The uid that owns this file
pub uid: Le<u32>,
/// The gid that owns this file
pub gid: Le<u32>,
/// The number of links to this file
/// (directory entries, symlinks, etc)
pub links: Le<u32>,
/// The length of this file, in bytes
pub size: Le<u64>,
pub ctime: Le<u64>,
pub ctime_nsec: Le<u32>,
pub mtime: Le<u64>,
pub mtime_nsec: Le<u32>,
pub atime: Le<u64>,
pub atime_nsec: Le<u32>,
pub record_level: Le<u32>,
pub padding: [u8; BLOCK_SIZE as usize - 4094],
/// The first 128 blocks of this file.
///
/// Total size: 128 * RECORD_SIZE (16 MiB, 128 KiB each)
pub level0: [BlockPtr<RecordRaw>; 128],
/// The next 64 * 256 blocks of this file,
/// stored behind 64 level one tables.
///
/// Total size: 64 * 256 * RECORD_SIZE (2 GiB, 32 MiB each)
pub level1: [BlockPtr<BlockListL1>; 64],
/// The next 32 * 256 * 256 blocks of this file,
/// stored behind 32 level two tables.
/// Each level two table points to 256 level one tables.
///
/// Total size: 32 * 256 * 256 * RECORD_SIZE (256 GiB, 8 GiB each)
pub level2: [BlockPtr<BlockListL2>; 32],
/// The next 16 * 256 * 256 * 256 blocks of this file,
/// stored behind 16 level three tables.
///
/// Total size: 16 * 256 * 256 * 256 * RECORD_SIZE (32 TiB, 2 TiB each)
pub level3: [BlockPtr<BlockListL3>; 16],
/// The next 12 * 256 * 256 * 256 * 256 blocks of this file,
/// stored behind 12 level four tables.
///
/// Total size: 12 * 256 * 256 * 256 * 256 * RECORD_SIZE (6 PiB, 512 TiB each)
pub level4: [BlockPtr<BlockListL4>; 12],
}
unsafe impl BlockTrait for Node {
fn empty(level: BlockLevel) -> Option<Self> {
if level.0 == 0 {
Some(Self::default())
} else {
None
}
}
}
impl Default for Node {
fn default() -> Self {
Self {
mode: 0.into(),
uid: 0.into(),
gid: 0.into(),
links: 0.into(),
size: 0.into(),
ctime: 0.into(),
ctime_nsec: 0.into(),
mtime: 0.into(),
mtime_nsec: 0.into(),
atime: 0.into(),
atime_nsec: 0.into(),
record_level: 0.into(),
padding: [0; BLOCK_SIZE as usize - 4094],
level0: [BlockPtr::default(); 128],
level1: [BlockPtr::default(); 64],
level2: [BlockPtr::default(); 32],
level3: [BlockPtr::default(); 16],
level4: [BlockPtr::default(); 12],
}
}
}
impl Node {
......@@ -24,106 +191,177 @@ impl Node {
pub const MODE_DIR: u16 = 0x4000;
pub const MODE_SYMLINK: u16 = 0xA000;
/// Mask for node permission bits
pub const MODE_PERM: u16 = 0x0FFF;
pub const MODE_EXEC: u16 = 0o1;
pub const MODE_WRITE: u16 = 0o2;
pub const MODE_READ: u16 = 0o4;
pub fn default() -> Node {
Node {
mode: 0,
uid: 0,
gid: 0,
ctime: 0,
ctime_nsec: 0,
mtime: 0,
mtime_nsec: 0,
name: [0; 222],
parent: 0,
next: 0,
extents: [Extent::default(); 15],
/// Create a new, empty node with the given metadata
pub fn new(mode: u16, uid: u32, gid: u32, ctime: u64, ctime_nsec: u32) -> Self {
Self {
mode: mode.into(),
uid: uid.into(),
gid: gid.into(),
links: 0.into(),
ctime: ctime.into(),
ctime_nsec: ctime_nsec.into(),
mtime: ctime.into(),
mtime_nsec: ctime_nsec.into(),
atime: ctime.into(),
atime_nsec: ctime_nsec.into(),
record_level: if mode & Self::MODE_TYPE == Self::MODE_FILE {
// Files take on record level
RECORD_LEVEL as u32
} else {
// Folders do not
0
}
.into(),
..Default::default()
}
}
pub fn new(mode: u16, name: &str, parent: u64, ctime: u64, ctime_nsec: u32) -> Node {
let mut bytes = [0; 222];
for (mut b, c) in bytes.iter_mut().zip(name.bytes()) {
*b = c;
}
/// This node's type & permissions.
/// - first four bits are permissions
/// - next four bits are permissions for the file's user
/// - next four bits are permissions for the file's group
/// - last four bits are permissions for everyone else
pub fn mode(&self) -> u16 {
self.mode.to_ne()
}
Node {
mode: mode,
uid: 0,
gid: 0,
ctime: ctime,
ctime_nsec: ctime_nsec,
mtime: ctime,
mtime_nsec: ctime_nsec,
name: bytes,
parent: parent,
next: 0,
extents: [Extent::default(); 15],
}
/// The uid that owns this file
pub fn uid(&self) -> u32 {
self.uid.to_ne()
}
pub fn name(&self) -> Result<&str, str::Utf8Error> {
let mut len = 0;
/// The gid that owns this file
pub fn gid(&self) -> u32 {
self.gid.to_ne()
}
for &b in self.name.iter() {
if b == 0 {
break;
}
len += 1;
}
/// The number of links to this file
/// (directory entries, symlinks, etc)
pub fn links(&self) -> u32 {
self.links.to_ne()
}
/// The length of this file, in bytes.
pub fn size(&self) -> u64 {
self.size.to_ne()
}
pub fn ctime(&self) -> (u64, u32) {
(self.ctime.to_ne(), self.ctime_nsec.to_ne())
}
pub fn mtime(&self) -> (u64, u32) {
(self.mtime.to_ne(), self.mtime_nsec.to_ne())
}
pub fn atime(&self) -> (u64, u32) {
(self.atime.to_ne(), self.atime_nsec.to_ne())
}
pub fn record_level(&self) -> BlockLevel {
BlockLevel(self.record_level.to_ne() as usize)
}
pub fn set_mode(&mut self, mode: u16) {
self.mode = mode.into();
}
pub fn set_uid(&mut self, uid: u32) {
self.uid = uid.into();
}
pub fn set_gid(&mut self, gid: u32) {
self.gid = gid.into();
}
pub fn set_links(&mut self, links: u32) {
self.links = links.into();
}
str::from_utf8(&self.name[..len])
pub fn set_size(&mut self, size: u64) {
self.size = size.into();
}
pub fn set_mtime(&mut self, mtime: u64, mtime_nsec: u32) {
self.mtime = mtime.into();
self.mtime_nsec = mtime_nsec.into();
}
pub fn set_atime(&mut self, atime: u64, atime_nsec: u32) {
self.atime = atime.into();
self.atime_nsec = atime_nsec.into();
}
pub fn is_dir(&self) -> bool {
self.mode & Node::MODE_TYPE == Node::MODE_DIR
self.mode() & Self::MODE_TYPE == Self::MODE_DIR
}
pub fn is_file(&self) -> bool {
self.mode & Node::MODE_TYPE == Node::MODE_FILE
self.mode() & Self::MODE_TYPE == Self::MODE_FILE
}
pub fn is_symlink(&self) -> bool {
self.mode & Node::MODE_TYPE == Node::MODE_SYMLINK
self.mode() & Self::MODE_TYPE == Self::MODE_SYMLINK
}
/// Tests if UID is the owner of that file, only true when uid=0 or when the UID stored in metadata is equal to the UID you supply
pub fn owner(&self, uid: u32) -> bool {
uid == 0 || self.uid() == uid
}
/// Tests if the current user has enough permissions to view the file, op is the operation,
/// like read and write, these modes are MODE_EXEC, MODE_READ, and MODE_WRITE
pub fn permission(&self, uid: u32, gid: u32, op: u16) -> bool {
let mut perm = self.mode & 0o7;
if self.uid == uid {
perm |= (self.mode >> 6) & 0o7;
let mut perm = self.mode() & 0o7;
if self.uid() == uid {
// If self.mode is 101100110, >> 6 would be 000000101
// 0o7 is octal for 111, or, when expanded to 9 digits is 000000111
perm |= (self.mode() >> 6) & 0o7;
// Since we erased the GID and OTHER bits when >>6'ing, |= will keep those bits in place.
}
if self.gid == gid || gid == 0 {
perm |= (self.mode >> 3) & 0o7;
if self.gid() == gid || gid == 0 {
perm |= (self.mode() >> 3) & 0o7;
}
if uid == 0 {
//set the `other` bits to 111
perm |= 0o7;
}
perm & op == op
}
pub fn size(&self) -> u64 {
self.extents.iter().fold(0, |size, extent| size + extent.length)
}
}
impl fmt::Debug for Node {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let extents: Vec<&Extent> = self.extents.iter().filter(|extent| -> bool { extent.length > 0 }).collect();
let mode = self.mode;
let uid = self.uid;
let gid = self.gid;
let links = self.links;
let size = self.size;
let ctime = self.ctime;
let ctime_nsec = self.ctime_nsec;
let mtime = self.mtime;
let mtime_nsec = self.mtime_nsec;
let atime = self.atime;
let atime_nsec = self.atime_nsec;
f.debug_struct("Node")
.field("mode", &self.mode)
.field("uid", &self.uid)
.field("gid", &self.gid)
.field("ctime", &self.ctime)
.field("ctime_nsec", &self.ctime_nsec)
.field("mtime", &self.mtime)
.field("mtime_nsec", &self.mtime_nsec)
.field("name", &self.name())
.field("next", &self.next)
.field("extents", &extents)
.field("mode", &mode)
.field("uid", &uid)
.field("gid", &gid)
.field("links", &links)
.field("size", &size)
.field("ctime", &ctime)
.field("ctime_nsec", &ctime_nsec)
.field("mtime", &mtime)
.field("mtime_nsec", &mtime_nsec)
.field("atime", &atime)
.field("atime_nsec", &atime_nsec)
//TODO: level0/1/2/3
.finish()
}
}
......@@ -132,7 +370,8 @@ impl ops::Deref for Node {
type Target = [u8];
fn deref(&self) -> &[u8] {
unsafe {
slice::from_raw_parts(self as *const Node as *const u8, mem::size_of::<Node>()) as &[u8]
slice::from_raw_parts(self as *const Node as *const u8, mem::size_of::<Node>())
as &[u8]
}
}
}
......@@ -140,12 +379,64 @@ impl ops::Deref for Node {
impl ops::DerefMut for Node {
fn deref_mut(&mut self) -> &mut [u8] {
unsafe {
slice::from_raw_parts_mut(self as *mut Node as *mut u8, mem::size_of::<Node>()) as &mut [u8]
slice::from_raw_parts_mut(self as *mut Node as *mut u8, mem::size_of::<Node>())
as &mut [u8]
}
}
}
#[test]
fn node_size_test() {
assert_eq!(mem::size_of::<Node>(), 512);
assert_eq!(mem::size_of::<Node>(), crate::BLOCK_SIZE as usize);
}
#[cfg(kani)]
#[kani::proof]
fn check_node_level() {
let offset = kani::any();
NodeLevel::new(offset);
}
#[cfg(kani)]
#[kani::proof]
fn check_node_perms() {
let mode = 0o750;
let uid = kani::any();
let gid = kani::any();
let ctime = kani::any();
let ctime_nsec = kani::any();
let node = Node::new(mode, uid, gid, ctime, ctime_nsec);
let root_uid = 0;
let root_gid = 0;
let other_uid = kani::any();
kani::assume(other_uid != uid);
kani::assume(other_uid != root_uid);
let other_gid = kani::any();
kani::assume(other_gid != gid);
kani::assume(other_gid != root_gid);
assert!(node.owner(uid));
assert!(node.permission(uid, gid, 0o7));
assert!(node.permission(uid, gid, 0o5));
assert!(node.permission(uid, other_gid, 0o7));
assert!(node.permission(uid, other_gid, 0o5));
assert!(!node.permission(other_uid, gid, 0o7));
assert!(node.permission(other_uid, gid, 0o5));
assert!(node.owner(root_uid));
assert!(node.permission(root_uid, root_gid, 0o7));
assert!(node.permission(root_uid, root_gid, 0o5));
assert!(node.permission(root_uid, other_gid, 0o7));
assert!(node.permission(root_uid, other_gid, 0o5));
assert!(!node.permission(other_uid, root_gid, 0o7));
assert!(node.permission(other_uid, root_gid, 0o5));
assert!(!node.owner(other_uid));
assert!(!node.permission(other_uid, other_gid, 0o7));
assert!(!node.permission(other_uid, other_gid, 0o5));
}
use alloc::{boxed::Box, vec};
use core::ops;
use crate::{BlockLevel, BlockTrait, RECORD_LEVEL};
//TODO: this is a box to prevent stack overflows
pub struct RecordRaw(Box<[u8]>);
unsafe impl BlockTrait for RecordRaw {
fn empty(level: BlockLevel) -> Option<Self> {
if level.0 <= RECORD_LEVEL {
Some(Self(vec![0; level.bytes() as usize].into_boxed_slice()))
} else {
None
}
}
}
impl Clone for RecordRaw {
fn clone(&self) -> Self {
Self(self.0.clone())
}
}
impl ops::Deref for RecordRaw {
type Target = [u8];
fn deref(&self) -> &[u8] {
&self.0
}
}
impl ops::DerefMut for RecordRaw {
fn deref_mut(&mut self) -> &mut [u8] {
&mut self.0
}
}
#[test]
fn record_raw_size_test() {
for level_i in 0..RECORD_LEVEL {
let level = BlockLevel(level_i);
assert_eq!(
RecordRaw::empty(level).unwrap().len(),
level.bytes() as usize
);
}
}
use crate::{unmount_path, DiskSparse, FileSystem, Node, TreePtr, ALLOC_GC_THRESHOLD};
use std::path::Path;
use std::process::Command;
use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering::Relaxed;
use std::{fs, thread, time};
static IMAGE_SEQ: AtomicUsize = AtomicUsize::new(0);
fn with_redoxfs<T, F>(callback: F) -> T
where
T: Send + Sync + 'static,
F: FnOnce(FileSystem<DiskSparse>) -> T + Send + Sync + 'static,
{
let disk_path = format!("image{}.bin", IMAGE_SEQ.fetch_add(1, Relaxed));
let res = {
let disk = DiskSparse::create(dbg!(&disk_path), 1024 * 1024 * 1024).unwrap();
let ctime = dbg!(time::SystemTime::now().duration_since(time::UNIX_EPOCH)).unwrap();
let fs = FileSystem::create(disk, None, ctime.as_secs(), ctime.subsec_nanos()).unwrap();
callback(fs)
};
dbg!(fs::remove_file(dbg!(disk_path))).unwrap();
res
}
fn with_mounted<T, F>(callback: F) -> T
where
T: Send + Sync + 'static,
F: FnOnce(&Path) -> T + Send + Sync + 'static,
{
let mount_path_o = format!("image{}", IMAGE_SEQ.fetch_add(1, Relaxed));
let mount_path = mount_path_o.clone();
let res = with_redoxfs(move |fs| {
if cfg!(not(target_os = "redox")) {
if !Path::new(&mount_path).exists() {
dbg!(fs::create_dir(dbg!(&mount_path))).unwrap();
}
}
let join_handle = crate::mount(fs, dbg!(mount_path), move |real_path| {
let real_path = real_path.to_owned();
thread::spawn(move || {
let res = callback(&real_path);
let real_path = real_path.to_str().unwrap();
if cfg!(target_os = "redox") {
dbg!(fs::remove_file(dbg!(format!(":{}", real_path)))).unwrap();
} else {
if !dbg!(Command::new("sync").status()).unwrap().success() {
panic!("sync failed");
}
if !unmount_path(real_path).is_ok() {
panic!("umount failed");
}
}
res
})
})
.unwrap();
join_handle.join().unwrap()
});
if cfg!(not(target_os = "redox")) {
dbg!(fs::remove_dir(dbg!(mount_path_o))).unwrap();
}
res
}
#[test]
fn simple() {
with_mounted(|path| {
dbg!(fs::create_dir(&path.join("test"))).unwrap();
})
}
#[cfg(target_os = "redox")]
#[test]
fn mmap() {
use syscall;
//TODO
with_mounted(|path| {
use std::slice;
let path = dbg!(path.join("test"));
let mmap_inner = |write: bool| {
let fd = dbg!(libredox::call::open(
path.to_str().unwrap(),
libredox::flag::O_CREAT | libredox::flag::O_RDWR | libredox::flag::O_CLOEXEC,
0,
))
.unwrap();
let map = unsafe {
slice::from_raw_parts_mut(
dbg!(libredox::call::mmap(libredox::call::MmapArgs {
fd,
offset: 0,
length: 128,
prot: libredox::flag::PROT_READ | libredox::flag::PROT_WRITE,
flags: libredox::flag::MAP_SHARED,
addr: core::ptr::null_mut(),
}))
.unwrap() as *mut u8,
128,
)
};
// Maps should be available after closing
assert_eq!(dbg!(libredox::call::close(fd)), Ok(()));
for i in 0..128 {
if write {
map[i as usize] = i;
}
assert_eq!(map[i as usize], i);
}
//TODO: add msync
unsafe {
assert_eq!(
dbg!(libredox::call::munmap(map.as_mut_ptr().cast(), map.len())),
Ok(())
);
}
};
mmap_inner(true);
mmap_inner(false);
})
}
#[test]
fn create_remove_should_not_increase_size() {
with_redoxfs(|mut fs| {
let initially_free = fs.allocator().free();
let tree_ptr = TreePtr::<Node>::root();
let name = "test";
let _ = fs
.tx(|tx| {
tx.create_node(tree_ptr, name, Node::MODE_FILE | 0644, 1, 0)?;
tx.remove_node(tree_ptr, name, Node::MODE_FILE)
})
.unwrap();
assert_eq!(fs.allocator().free(), initially_free);
});
}
#[test]
fn many_create_remove_should_not_increase_size() {
with_redoxfs(|mut fs| {
let initially_free = fs.allocator().free();
let tree_ptr = TreePtr::<Node>::root();
let name = "test";
// Iterate over 255 times to prove deleted files don't retain space within the node tree
// Iterate to an ALLOC_GC_THRESHOLD boundary to ensure the allocator GC reclaims space
let start = fs.header.generation.to_ne();
let end = start + ALLOC_GC_THRESHOLD;
let end = end - (end % ALLOC_GC_THRESHOLD) + 1 + ALLOC_GC_THRESHOLD;
for i in start..end {
let _ = fs
.tx(|tx| {
tx.create_node(
tree_ptr,
&format!("{}{}", name, i),
Node::MODE_FILE | 0644,
1,
0,
)?;
tx.remove_node(tree_ptr, &format!("{}{}", name, i), Node::MODE_FILE)
})
.unwrap();
}
// Any value greater than 0 indicates a storage leak
let diff = initially_free - fs.allocator().free();
assert_eq!(diff, 0);
});
}
use alloc::{
boxed::Box,
collections::{BTreeMap, VecDeque},
vec::Vec,
};
use core::{
cmp::min,
mem,
ops::{Deref, DerefMut},
};
use syscall::error::{
Error, Result, EEXIST, EINVAL, EIO, EISDIR, ENOENT, ENOSPC, ENOTDIR, ENOTEMPTY, ERANGE,
};
use crate::{
AllocEntry, AllocList, Allocator, BlockAddr, BlockData, BlockLevel, BlockPtr, BlockTrait,
DirEntry, DirList, Disk, FileSystem, Header, Node, NodeLevel, RecordRaw, TreeData, TreePtr,
ALLOC_GC_THRESHOLD, ALLOC_LIST_ENTRIES, DIR_ENTRY_MAX_LENGTH, HEADER_RING,
};
pub struct Transaction<'a, D: Disk> {
fs: &'a mut FileSystem<D>,
//TODO: make private
pub header: Header,
//TODO: make private
pub header_changed: bool,
allocator: Allocator,
allocator_log: VecDeque<AllocEntry>,
deallocate: Vec<BlockAddr>,
write_cache: BTreeMap<BlockAddr, Box<[u8]>>,
}
impl<'a, D: Disk> Transaction<'a, D> {
pub(crate) fn new(fs: &'a mut FileSystem<D>) -> Self {
let header = fs.header;
let allocator = fs.allocator.clone();
Self {
fs,
header,
header_changed: false,
allocator,
allocator_log: VecDeque::new(),
deallocate: Vec::new(),
write_cache: BTreeMap::new(),
}
}
pub fn commit(mut self, squash: bool) -> Result<()> {
self.sync(squash)?;
self.fs.header = self.header;
self.fs.allocator = self.allocator;
Ok(())
}
//
// MARK: block operations
//
/// Allocate a new block of size `level`, returning its address.
/// - returns `Err(ENOSPC)` if a block of this size could not be alloated.
/// - unsafe because order must be done carefully and changes must be flushed to disk
unsafe fn allocate(&mut self, level: BlockLevel) -> Result<BlockAddr> {
match self.allocator.allocate(level) {
Some(addr) => {
self.allocator_log.push_back(AllocEntry::allocate(addr));
Ok(addr)
}
None => Err(Error::new(ENOSPC)),
}
}
/// Deallocate the given block.
/// - unsafe because order must be done carefully and changes must be flushed to disk
unsafe fn deallocate(&mut self, addr: BlockAddr) {
//TODO: should we use some sort of not-null abstraction?
assert!(!addr.is_null());
// Remove from write_cache if it is there, since it no longer needs to be written
//TODO: for larger blocks do we need to check for sub-blocks in here?
self.write_cache.remove(&addr);
// Search and remove the last matching entry in allocator_log
let mut found = false;
for i in (0..self.allocator_log.len()).rev() {
let entry = self.allocator_log[i];
if entry.index() == addr.index() && entry.count() == -addr.level().blocks() {
found = true;
self.allocator_log.remove(i);
break;
}
}
if found {
// Deallocate immediately since it is an allocation that was not needed
self.allocator.deallocate(addr);
} else {
// Deallocate later when syncing filesystem, to avoid re-use
self.deallocate.push(addr);
}
}
fn deallocate_block<T: BlockTrait>(&mut self, ptr: BlockPtr<T>) {
if !ptr.is_null() {
unsafe {
self.deallocate(ptr.addr());
}
}
}
/// Drain `self.allocator_log` and `self.deallocate`,
/// updating the [`AllocList`] with the resulting state.
///
/// This method does not write anything to disk,
/// all writes are cached.
///
/// To keep the allocator log from growing excessively, it will
/// periodically be fully rebuilt using the state of `self.allocator`.
/// This rebuild can be forced by setting `force_squash` to `true`.
fn sync_allocator(&mut self, force_squash: bool) -> Result<bool> {
let mut prev_ptr = BlockPtr::default();
let should_gc = self.header.generation() % ALLOC_GC_THRESHOLD == 0
&& self.header.generation() >= ALLOC_GC_THRESHOLD;
if force_squash || should_gc {
// Clear and rebuild alloc log
self.allocator_log.clear();
let levels = self.allocator.levels();
for level in (0..levels.len()).rev() {
let count = (1 << level) as i64;
'indexs: for &index in levels[level].iter() {
for entry in self.allocator_log.iter_mut() {
if index + count as u64 == entry.index() {
// New entry is at start of existing entry
*entry = AllocEntry::new(index, count + entry.count());
continue 'indexs;
} else if entry.index() + entry.count() as u64 == index {
// New entry is at end of existing entry
*entry = AllocEntry::new(entry.index(), entry.count() + count);
continue 'indexs;
}
}
self.allocator_log.push_back(AllocEntry::new(index, count));
}
}
// Prepare to deallocate old alloc blocks
let mut alloc_ptr = self.header.alloc;
while !alloc_ptr.is_null() {
let alloc = self.read_block(alloc_ptr)?;
self.deallocate.push(alloc.addr());
alloc_ptr = alloc.data().prev;
}
} else {
// Return if there are no log changes
if self.allocator_log.is_empty() && self.deallocate.is_empty() {
return Ok(false);
}
// Push old alloc block to front of allocator log
//TODO: just skip this if it is already full?
let alloc = self.read_block(self.header.alloc)?;
for i in (0..alloc.data().entries.len()).rev() {
let entry = alloc.data().entries[i];
if !entry.is_null() {
self.allocator_log.push_front(entry);
}
}
// Prepare to deallocate old alloc block
self.deallocate.push(alloc.addr());
// Link to previous alloc block
prev_ptr = alloc.data().prev;
}
// Allocate required blocks, including CoW of current alloc tail
let mut new_blocks = Vec::new();
while new_blocks.len() * ALLOC_LIST_ENTRIES
<= self.allocator_log.len() + self.deallocate.len()
{
new_blocks.push(unsafe { self.allocate(BlockLevel::default())? });
}
// De-allocate old blocks (after allocation to prevent re-use)
//TODO: optimize allocator log in memory
while let Some(addr) = self.deallocate.pop() {
self.allocator.deallocate(addr);
self.allocator_log.push_back(AllocEntry::deallocate(addr));
}
for new_block in new_blocks {
let mut alloc = BlockData::<AllocList>::empty(new_block).unwrap();
alloc.data_mut().prev = prev_ptr;
for entry in alloc.data_mut().entries.iter_mut() {
if let Some(log_entry) = self.allocator_log.pop_front() {
*entry = log_entry;
} else {
break;
}
}
prev_ptr = unsafe { self.write_block(alloc)? };
}
self.header.alloc = prev_ptr;
self.header_changed = true;
Ok(true)
}
/// Write all changes cached in this [`Transaction`] to disk.
pub fn sync(&mut self, force_squash: bool) -> Result<bool> {
// Make sure alloc is synced
self.sync_allocator(force_squash)?;
// Write all items in write cache
for (addr, raw) in self.write_cache.iter_mut() {
// sync_alloc must have changed alloc block pointer
// if we have any blocks to write
assert!(self.header_changed);
self.fs.encrypt(raw);
let count = unsafe { self.fs.disk.write_at(self.fs.block + addr.index(), raw)? };
if count != raw.len() {
// Read wrong number of bytes
#[cfg(feature = "log")]
log::error!("SYNC WRITE_CACHE: WRONG NUMBER OF BYTES");
return Err(Error::new(EIO));
}
}
self.write_cache.clear();
// Do nothing if there are no changes to write.
//
// This only happens if `self.write_cache` was empty,
// and the fs header wasn't changed by another operation.
if !self.header_changed {
return Ok(false);
}
// Update header to next generation
let gen = self.header.update(self.fs.aes_opt.as_ref());
let gen_block = gen % HEADER_RING;
// Write header
let count = unsafe {
self.fs
.disk
.write_at(self.fs.block + gen_block, &self.header)?
};
if count != mem::size_of_val(&self.header) {
// Read wrong number of bytes
#[cfg(feature = "log")]
log::error!("SYNC: WRONG NUMBER OF BYTES");
return Err(Error::new(EIO));
}
self.header_changed = false;
Ok(true)
}
pub fn read_block<T: BlockTrait + DerefMut<Target = [u8]>>(
&mut self,
ptr: BlockPtr<T>,
) -> Result<BlockData<T>> {
if ptr.is_null() {
// Pointer is invalid (should this return None?)
#[cfg(feature = "log")]
log::error!("READ_BLOCK: POINTER IS NULL");
return Err(Error::new(ENOENT));
}
let mut data = match T::empty(ptr.addr().level()) {
Some(some) => some,
None => {
#[cfg(feature = "log")]
log::error!("READ_BLOCK: INVALID BLOCK LEVEL FOR TYPE");
return Err(Error::new(ENOENT));
}
};
if let Some(raw) = self.write_cache.get(&ptr.addr()) {
data.copy_from_slice(raw);
} else {
let count = unsafe {
self.fs
.disk
.read_at(self.fs.block + ptr.addr().index(), &mut data)?
};
if count != data.len() {
// Read wrong number of bytes
#[cfg(feature = "log")]
log::error!("READ_BLOCK: WRONG NUMBER OF BYTES");
return Err(Error::new(EIO));
}
self.fs.decrypt(&mut data);
}
let block = BlockData::new(ptr.addr(), data);
let block_ptr = block.create_ptr();
if block_ptr.hash() != ptr.hash() {
// Incorrect hash
#[cfg(feature = "log")]
log::error!(
"READ_BLOCK: INCORRECT HASH 0x{:X} != 0x{:X} for block 0x{:X}",
block_ptr.hash(),
ptr.hash(),
ptr.addr().index()
);
return Err(Error::new(EIO));
}
Ok(block)
}
/// Read block data or, if pointer is null, return default block data
///
/// # Safety
/// Unsafe because it creates strange BlockData types that must be swapped before use
unsafe fn read_block_or_empty<T: BlockTrait + DerefMut<Target = [u8]>>(
&mut self,
ptr: BlockPtr<T>,
) -> Result<BlockData<T>> {
if ptr.is_null() {
match T::empty(ptr.addr().level()) {
Some(empty) => Ok(BlockData::new(BlockAddr::default(), empty)),
None => {
#[cfg(feature = "log")]
log::error!("READ_BLOCK_OR_EMPTY: INVALID BLOCK LEVEL FOR TYPE");
Err(Error::new(ENOENT))
}
}
} else {
self.read_block(ptr)
}
}
unsafe fn read_record<T: BlockTrait + DerefMut<Target = [u8]>>(
&mut self,
ptr: BlockPtr<T>,
level: BlockLevel,
) -> Result<BlockData<T>> {
let record = unsafe { self.read_block_or_empty(ptr)? };
if record.addr().level() >= level {
// Return record if it is larger than or equal to requested level
return Ok(record);
}
// If a larger level was requested,
// create a fake record with the requested level
// and fill it with the data in the original record.
let (_old_addr, old_raw) = unsafe { record.into_parts() };
let mut raw = match T::empty(level) {
Some(empty) => empty,
None => {
#[cfg(feature = "log")]
log::error!("READ_RECORD: INVALID BLOCK LEVEL FOR TYPE");
return Err(Error::new(ENOENT));
}
};
let len = min(raw.len(), old_raw.len());
raw[..len].copy_from_slice(&old_raw[..len]);
Ok(BlockData::new(BlockAddr::null(level), raw))
}
/// Write block data to a new address, returning new address
pub fn sync_block<T: BlockTrait + Deref<Target = [u8]>>(
&mut self,
mut block: BlockData<T>,
) -> Result<BlockPtr<T>> {
// Swap block to new address
let level = block.addr().level();
let old_addr = block.swap_addr(unsafe { self.allocate(level)? });
// Deallocate old address (will only take effect after sync_allocator, which helps to
// prevent re-use before a new header is written
if !old_addr.is_null() {
unsafe {
self.deallocate(old_addr);
}
}
// Write new block
unsafe { self.write_block(block) }
}
/// Write block data, returning a calculated block pointer
///
/// # Safety
/// Unsafe to encourage CoW semantics
pub(crate) unsafe fn write_block<T: BlockTrait + Deref<Target = [u8]>>(
&mut self,
block: BlockData<T>,
) -> Result<BlockPtr<T>> {
if block.addr().is_null() {
// Pointer is invalid
#[cfg(feature = "log")]
log::error!("WRITE_BLOCK: POINTER IS NULL");
return Err(Error::new(ENOENT));
}
//TODO: do not convert to boxed slice if it already is one
self.write_cache.insert(
block.addr(),
block.data().deref().to_vec().into_boxed_slice(),
);
Ok(block.create_ptr())
}
//
// MARK: tree operations
//
/// Walk the tree and return the contents and address
/// of the data block that `ptr` points too.
fn read_tree_and_addr<T: BlockTrait + DerefMut<Target = [u8]>>(
&mut self,
ptr: TreePtr<T>,
) -> Result<(TreeData<T>, BlockAddr)> {
if ptr.is_null() {
// ID is invalid (should this return None?)
#[cfg(feature = "log")]
log::error!("READ_TREE: ID IS NULL");
return Err(Error::new(ENOENT));
}
let (i3, i2, i1, i0) = ptr.indexes();
let l3 = self.read_block(self.header.tree)?;
let l2 = self.read_block(l3.data().ptrs[i3])?;
let l1 = self.read_block(l2.data().ptrs[i2])?;
let l0 = self.read_block(l1.data().ptrs[i1])?;
let raw = self.read_block(l0.data().ptrs[i0])?;
//TODO: transmute instead of copy?
let mut data = match T::empty(BlockLevel::default()) {
Some(some) => some,
None => {
#[cfg(feature = "log")]
log::error!("READ_TREE: INVALID BLOCK LEVEL FOR TYPE");
return Err(Error::new(ENOENT));
}
};
data.copy_from_slice(raw.data());
Ok((TreeData::new(ptr.id(), data), raw.addr()))
}
/// Walk the tree and return the contents of the data block that `ptr` points too.
pub fn read_tree<T: BlockTrait + DerefMut<Target = [u8]>>(
&mut self,
ptr: TreePtr<T>,
) -> Result<TreeData<T>> {
Ok(self.read_tree_and_addr(ptr)?.0)
}
/// Insert `block_ptr` into the first free slot in the tree,
/// returning a pointer to that slot.
pub fn insert_tree<T: Deref<Target = [u8]>>(
&mut self,
block_ptr: BlockPtr<T>,
) -> Result<TreePtr<T>> {
// TODO: improve performance, reduce writes
// Remember that if there is a free block at any level it will always sync when it
// allocates at the lowest level, so we can save a write by not writing each level as it
// is allocated.
unsafe {
let mut l3 = self.read_block(self.header.tree)?;
for i3 in 0..l3.data().ptrs.len() {
let mut l2 = self.read_block_or_empty(l3.data().ptrs[i3])?;
for i2 in 0..l2.data().ptrs.len() {
let mut l1 = self.read_block_or_empty(l2.data().ptrs[i2])?;
for i1 in 0..l1.data().ptrs.len() {
let mut l0 = self.read_block_or_empty(l1.data().ptrs[i1])?;
for i0 in 0..l0.data().ptrs.len() {
let pn = l0.data().ptrs[i0];
// Skip if already in use
if !pn.is_null() {
continue;
}
let tree_ptr = TreePtr::from_indexes((i3, i2, i1, i0));
// Skip if this is a reserved node (null)
if tree_ptr.is_null() {
continue;
}
// TODO: do we need to write all of these?
// Write updates to newly allocated blocks
l0.data_mut().ptrs[i0] = block_ptr.cast();
l1.data_mut().ptrs[i1] = self.sync_block(l0)?;
l2.data_mut().ptrs[i2] = self.sync_block(l1)?;
l3.data_mut().ptrs[i3] = self.sync_block(l2)?;
self.header.tree = self.sync_block(l3)?;
self.header_changed = true;
return Ok(tree_ptr);
}
}
}
}
}
Err(Error::new(ENOSPC))
}
/// Clear the previously claimed slot in the tree for the given `ptr`. Note that this
/// should only be called after the corresponding node block has already been deallocated.
fn remove_tree<T: BlockTrait + DerefMut<Target = [u8]>>(
&mut self,
ptr: TreePtr<T>,
) -> Result<()> {
if ptr.is_null() {
// ID is invalid (should this return None?)
#[cfg(feature = "log")]
log::error!("READ_TREE: ID IS NULL");
return Err(Error::new(ENOENT));
}
let (i3, i2, i1, i0) = ptr.indexes();
let mut l3 = self.read_block(self.header.tree)?;
let mut l2 = self.read_block(l3.data().ptrs[i3])?;
let mut l1 = self.read_block(l2.data().ptrs[i2])?;
let mut l0 = self.read_block(l1.data().ptrs[i1])?;
// Clear the value in the tree, but do not deallocate the block, as that should already
// have been done at the node level.
l0.data_mut().ptrs[i0] = BlockPtr::default();
l1.data_mut().ptrs[i1] = self.sync_block(l0)?;
l2.data_mut().ptrs[i2] = self.sync_block(l1)?;
l3.data_mut().ptrs[i3] = self.sync_block(l2)?;
self.header.tree = self.sync_block(l3)?;
self.header_changed = true;
Ok(())
}
pub fn sync_trees<T: Deref<Target = [u8]>>(&mut self, nodes: &[TreeData<T>]) -> Result<()> {
for node in nodes.iter().rev() {
let ptr = node.ptr();
if ptr.is_null() {
// ID is invalid
#[cfg(feature = "log")]
log::error!("SYNC_TREE: ID IS NULL");
return Err(Error::new(ENOENT));
}
}
for node in nodes.iter().rev() {
let (i3, i2, i1, i0) = node.ptr().indexes();
let mut l3 = self.read_block(self.header.tree)?;
let mut l2 = self.read_block(l3.data().ptrs[i3])?;
let mut l1 = self.read_block(l2.data().ptrs[i2])?;
let mut l0 = self.read_block(l1.data().ptrs[i1])?;
let mut raw = self.read_block(l0.data().ptrs[i0])?;
// Return if data is equal
if raw.data().deref() == node.data().deref() {
continue;
}
//TODO: transmute instead of copy?
raw.data_mut().copy_from_slice(node.data());
// Write updates to newly allocated blocks
l0.data_mut().ptrs[i0] = self.sync_block(raw)?;
l1.data_mut().ptrs[i1] = self.sync_block(l0)?;
l2.data_mut().ptrs[i2] = self.sync_block(l1)?;
l3.data_mut().ptrs[i3] = self.sync_block(l2)?;
self.header.tree = self.sync_block(l3)?;
self.header_changed = true;
}
Ok(())
}
pub fn sync_tree<T: Deref<Target = [u8]>>(&mut self, node: TreeData<T>) -> Result<()> {
self.sync_trees(&[node])
}
//
// MARK: node operations
//
// TODO: use more efficient methods for reading directories
/// Write all children of `parent_ptr` to `children`.
/// `parent_ptr` must point to a directory node.
pub fn child_nodes(
&mut self,
parent_ptr: TreePtr<Node>,
children: &mut Vec<DirEntry>,
) -> Result<()> {
let parent = self.read_tree(parent_ptr)?;
let record_level = parent.data().record_level();
for record_offset in 0..(parent.data().size() / record_level.bytes()) {
let block_ptr = self.node_record_ptr(&parent, record_offset)?;
// TODO: is this safe? what if child_nodes is called on
// a node that isn't a directory?
let dir_ptr: BlockPtr<DirList> = unsafe { block_ptr.cast() };
let dir = self.read_block(dir_ptr)?;
for entry in dir.data().entries.iter() {
let node_ptr = entry.node_ptr();
// Skip empty entries
if node_ptr.is_null() {
continue;
}
children.push(*entry);
}
}
Ok(())
}
//TODO: improve performance (h-tree?)
/// Find a node that is a child of the `parent_ptr` and is named `name`.
/// Returns ENOENT if this node is not found.
pub fn find_node(&mut self, parent_ptr: TreePtr<Node>, name: &str) -> Result<TreeData<Node>> {
let parent = self.read_tree(parent_ptr)?;
let record_level = parent.data().record_level();
for block_offset in 0..(parent.data().size() / record_level.bytes()) {
let block_ptr = self.node_record_ptr(&parent, block_offset)?;
let dir_ptr: BlockPtr<DirList> = unsafe { block_ptr.cast() };
let dir = self.read_block(dir_ptr)?;
for entry in dir.data().entries.iter() {
let node_ptr = entry.node_ptr();
// Skip empty entries
if node_ptr.is_null() {
continue;
}
// Return node pointer if name matches
if let Some(entry_name) = entry.name() {
if entry_name == name {
//TODO: Do not require read of node
return self.read_tree(node_ptr);
}
}
}
}
Err(Error::new(ENOENT))
}
// TODO: improve performance (h-tree?)
/// Create a new node in the tree with the given parameters.
pub fn create_node(
&mut self,
parent_ptr: TreePtr<Node>,
name: &str,
mode: u16,
ctime: u64,
ctime_nsec: u32,
) -> Result<TreeData<Node>> {
self.check_name(&parent_ptr, name)?;
unsafe {
let parent = self.read_tree(parent_ptr)?;
let node_block_data = BlockData::new(
self.allocate(BlockLevel::default())?,
Node::new(
mode,
parent.data().uid(),
parent.data().gid(),
ctime,
ctime_nsec,
),
);
let node_block_ptr = self.write_block(node_block_data)?;
let node_ptr = self.insert_tree(node_block_ptr)?;
self.link_node(parent_ptr, name, node_ptr)?;
//TODO: do not re-read node
self.read_tree(node_ptr)
}
}
pub fn link_node(
&mut self,
parent_ptr: TreePtr<Node>,
name: &str,
node_ptr: TreePtr<Node>,
) -> Result<()> {
self.check_name(&parent_ptr, name)?;
let mut parent = self.read_tree(parent_ptr)?;
let mut node = self.read_tree(node_ptr)?;
// Increment node reference counter
let links = node.data().links();
node.data_mut().set_links(links + 1);
let entry = DirEntry::new(node_ptr, name);
let record_level = parent.data().record_level();
let record_end = parent.data().size() / record_level.bytes();
for record_offset in 0..record_end {
let mut dir_record_ptr = self.node_record_ptr(&parent, record_offset)?;
let mut dir_ptr: BlockPtr<DirList> = unsafe { dir_record_ptr.cast() };
let mut dir = self.read_block(dir_ptr)?;
for old_entry in dir.data_mut().entries.iter_mut() {
if !old_entry.node_ptr().is_null() {
continue;
}
// Write our new entry into the first
// free slot in this directory
*old_entry = entry;
// Write updated blocks
dir_ptr = self.sync_block(dir)?;
dir_record_ptr = unsafe { dir_ptr.cast() };
self.sync_node_record_ptr(&mut parent, record_offset, dir_record_ptr)?;
self.sync_trees(&[parent, node])?;
return Ok(());
}
}
// We couldn't find a free direntry slot, this directory is full.
// We now need to add a new dirlist block to the parent node,
// with `entry` as its first member.
let mut dir =
BlockData::<DirList>::empty(unsafe { self.allocate(BlockLevel::default())? }).unwrap();
dir.data_mut().entries[0] = entry;
let dir_ptr = unsafe { self.write_block(dir)? };
let dir_record_ptr = unsafe { dir_ptr.cast() };
self.sync_node_record_ptr(&mut parent, record_end, dir_record_ptr)?;
parent
.data_mut()
.set_size((record_end + 1) * record_level.bytes());
self.sync_trees(&[parent, node])?;
Ok(())
}
pub fn remove_node(&mut self, parent_ptr: TreePtr<Node>, name: &str, mode: u16) -> Result<()> {
let mut parent = self.read_tree(parent_ptr)?;
let record_level = parent.data().record_level();
let records = parent.data().size() / record_level.bytes();
for record_offset in 0..records {
let mut dir_record_ptr = self.node_record_ptr(&parent, record_offset)?;
let mut dir_ptr: BlockPtr<DirList> = unsafe { dir_record_ptr.cast() };
let mut dir = self.read_block(dir_ptr)?;
let mut node_opt = None;
for entry in dir.data_mut().entries.iter_mut() {
let node_ptr = entry.node_ptr();
// Skip empty entries
if node_ptr.is_null() {
continue;
}
// Check if name matches
if let Some(entry_name) = entry.name() {
if entry_name == name {
// Read node and test type against requested type
let (node, addr) = self.read_tree_and_addr(node_ptr)?;
if node.data().mode() & Node::MODE_TYPE == mode {
if node.data().is_dir()
&& node.data().size() > 0
&& node.data().links() == 1
{
// Tried to remove directory that still has entries
return Err(Error::new(ENOTEMPTY));
}
// Save node and clear entry
node_opt = Some((entry.node_ptr(), node, addr));
*entry = DirEntry::default();
break;
} else if node.data().is_dir() {
// Found directory instead of requested type
return Err(Error::new(EISDIR));
} else {
// Did not find directory when requested
return Err(Error::new(ENOTDIR));
}
}
}
}
if let Some((node_tree_ptr, mut node, addr)) = node_opt {
let links = node.data().links();
let remove_node = if links > 1 {
node.data_mut().set_links(links - 1);
false
} else {
node.data_mut().set_links(0);
self.truncate_node_inner(&mut node, 0)?;
true
};
if record_offset == records - 1 && dir.data().is_empty() {
let mut remove_record = record_offset;
loop {
// Remove empty parent record, if it is at the end
self.remove_node_record_ptr(&mut parent, remove_record)?;
parent
.data_mut()
.set_size(remove_record * record_level.bytes());
// Keep going for any other empty records
if remove_record > 0 {
remove_record -= 1;
dir_record_ptr = self.node_record_ptr(&parent, remove_record)?;
dir_ptr = unsafe { dir_record_ptr.cast() };
dir = self.read_block(dir_ptr)?;
if dir.data().is_empty() {
continue;
}
}
break;
}
} else {
// Save new parent record
dir_ptr = self.sync_block(dir)?;
dir_record_ptr = unsafe { dir_ptr.cast() };
self.sync_node_record_ptr(&mut parent, record_offset, dir_record_ptr)?;
}
if remove_node {
self.sync_tree(parent)?;
self.remove_tree(node_tree_ptr)?;
unsafe {
self.deallocate(addr);
}
} else {
// Sync both parent and node at the same time
self.sync_trees(&[parent, node])?;
}
return Ok(());
}
}
Err(Error::new(ENOENT))
}
pub fn rename_node(
&mut self,
orig_parent_ptr: TreePtr<Node>,
orig_name: &str,
new_parent_ptr: TreePtr<Node>,
new_name: &str,
) -> Result<()> {
let orig = self.find_node(orig_parent_ptr, orig_name)?;
// TODO: only allow ENOENT as an error?
if let Ok(new) = self.find_node(new_parent_ptr, new_name) {
// Move to same name, return
if new.id() == orig.id() {
return Ok(());
}
// Remove new name
// (we renamed to a node that already exists, overwrite it.)
self.remove_node(
new_parent_ptr,
new_name,
new.data().mode() & Node::MODE_TYPE,
)?;
}
// Link original file to new name
self.link_node(new_parent_ptr, new_name, orig.ptr())?;
// Remove original file
self.remove_node(
orig_parent_ptr,
orig_name,
orig.data().mode() & Node::MODE_TYPE,
)?;
Ok(())
}
fn check_name(&mut self, parent_ptr: &TreePtr<Node>, name: &str) -> Result<()> {
if name.contains(':') {
return Err(Error::new(EINVAL));
}
if name.len() > DIR_ENTRY_MAX_LENGTH {
return Err(Error::new(EINVAL));
}
if self.find_node(parent_ptr.clone(), name).is_ok() {
return Err(Error::new(EEXIST));
}
Ok(())
}
/// Get a pointer to a the record of `node` with the given offset.
/// (i.e, to the `n`th record of `node`.)
fn node_record_ptr(
&mut self,
node: &TreeData<Node>,
record_offset: u64,
) -> Result<BlockPtr<RecordRaw>> {
unsafe {
match NodeLevel::new(record_offset).ok_or(Error::new(ERANGE))? {
NodeLevel::L0(i0) => Ok(node.data().level0[i0]),
NodeLevel::L1(i1, i0) => {
let l0 = self.read_block_or_empty(node.data().level1[i1])?;
Ok(l0.data().ptrs[i0])
}
NodeLevel::L2(i2, i1, i0) => {
let l1 = self.read_block_or_empty(node.data().level2[i2])?;
let l0 = self.read_block_or_empty(l1.data().ptrs[i1])?;
Ok(l0.data().ptrs[i0])
}
NodeLevel::L3(i3, i2, i1, i0) => {
let l2 = self.read_block_or_empty(node.data().level3[i3])?;
let l1 = self.read_block_or_empty(l2.data().ptrs[i2])?;
let l0 = self.read_block_or_empty(l1.data().ptrs[i1])?;
Ok(l0.data().ptrs[i0])
}
NodeLevel::L4(i4, i3, i2, i1, i0) => {
let l3 = self.read_block_or_empty(node.data().level4[i4])?;
let l2 = self.read_block_or_empty(l3.data().ptrs[i3])?;
let l1 = self.read_block_or_empty(l2.data().ptrs[i2])?;
let l0 = self.read_block_or_empty(l1.data().ptrs[i1])?;
Ok(l0.data().ptrs[i0])
}
}
}
}
fn remove_node_record_ptr(
&mut self,
node: &mut TreeData<Node>,
record_offset: u64,
) -> Result<()> {
unsafe {
match NodeLevel::new(record_offset).ok_or(Error::new(ERANGE))? {
NodeLevel::L0(i0) => {
self.deallocate_block(node.data_mut().level0[i0].clear());
}
NodeLevel::L1(i1, i0) => {
let mut l0 = self.read_block_or_empty(node.data().level1[i1])?;
self.deallocate_block(l0.data_mut().ptrs[i0].clear());
if l0.data().is_empty() {
self.deallocate_block(node.data_mut().level1[i1].clear());
} else {
node.data_mut().level1[i1] = self.sync_block(l0)?;
}
}
NodeLevel::L2(i2, i1, i0) => {
let mut l1 = self.read_block_or_empty(node.data().level2[i2])?;
let mut l0 = self.read_block_or_empty(l1.data().ptrs[i1])?;
self.deallocate_block(l0.data_mut().ptrs[i0].clear());
if l0.data().is_empty() {
self.deallocate_block(l1.data_mut().ptrs[i1].clear());
} else {
l1.data_mut().ptrs[i1] = self.sync_block(l0)?;
}
if l1.data().is_empty() {
self.deallocate_block(node.data_mut().level2[i2].clear());
} else {
node.data_mut().level2[i2] = self.sync_block(l1)?;
}
}
NodeLevel::L3(i3, i2, i1, i0) => {
let mut l2 = self.read_block_or_empty(node.data().level3[i3])?;
let mut l1 = self.read_block_or_empty(l2.data().ptrs[i2])?;
let mut l0 = self.read_block_or_empty(l1.data().ptrs[i1])?;
self.deallocate_block(l0.data_mut().ptrs[i0].clear());
if l0.data().is_empty() {
self.deallocate_block(l1.data_mut().ptrs[i1].clear());
} else {
l1.data_mut().ptrs[i1] = self.sync_block(l0)?;
}
if l1.data().is_empty() {
self.deallocate_block(l2.data_mut().ptrs[i2].clear());
} else {
l2.data_mut().ptrs[i2] = self.sync_block(l1)?;
}
if l2.data().is_empty() {
self.deallocate_block(node.data_mut().level3[i3].clear());
} else {
node.data_mut().level3[i3] = self.sync_block(l2)?;
}
}
NodeLevel::L4(i4, i3, i2, i1, i0) => {
let mut l3 = self.read_block_or_empty(node.data().level4[i4])?;
let mut l2 = self.read_block_or_empty(l3.data().ptrs[i3])?;
let mut l1 = self.read_block_or_empty(l2.data().ptrs[i2])?;
let mut l0 = self.read_block_or_empty(l1.data().ptrs[i1])?;
self.deallocate_block(l0.data_mut().ptrs[i0].clear());
if l0.data().is_empty() {
self.deallocate_block(l1.data_mut().ptrs[i1].clear());
} else {
l1.data_mut().ptrs[i1] = self.sync_block(l0)?;
}
if l1.data().is_empty() {
self.deallocate_block(l2.data_mut().ptrs[i2].clear());
} else {
l2.data_mut().ptrs[i2] = self.sync_block(l1)?;
}
if l2.data().is_empty() {
self.deallocate_block(l3.data_mut().ptrs[i3].clear());
} else {
l3.data_mut().ptrs[i3] = self.sync_block(l2)?;
}
if l3.data().is_empty() {
self.deallocate_block(node.data_mut().level4[i4].clear());
} else {
node.data_mut().level4[i4] = self.sync_block(l3)?;
}
}
}
Ok(())
}
}
/// Set the record at `ptr` as the data at `record_offset` of `node`.
fn sync_node_record_ptr(
&mut self,
node: &mut TreeData<Node>,
record_offset: u64,
ptr: BlockPtr<RecordRaw>,
) -> Result<()> {
unsafe {
match NodeLevel::new(record_offset).ok_or(Error::new(ERANGE))? {
NodeLevel::L0(i0) => {
node.data_mut().level0[i0] = ptr;
}
NodeLevel::L1(i1, i0) => {
let mut l0 = self.read_block_or_empty(node.data().level1[i1])?;
l0.data_mut().ptrs[i0] = ptr;
node.data_mut().level1[i1] = self.sync_block(l0)?;
}
NodeLevel::L2(i2, i1, i0) => {
let mut l1 = self.read_block_or_empty(node.data().level2[i2])?;
let mut l0 = self.read_block_or_empty(l1.data().ptrs[i1])?;
l0.data_mut().ptrs[i0] = ptr;
l1.data_mut().ptrs[i1] = self.sync_block(l0)?;
node.data_mut().level2[i2] = self.sync_block(l1)?;
}
NodeLevel::L3(i3, i2, i1, i0) => {
let mut l2 = self.read_block_or_empty(node.data().level3[i3])?;
let mut l1 = self.read_block_or_empty(l2.data().ptrs[i2])?;
let mut l0 = self.read_block_or_empty(l1.data().ptrs[i1])?;
l0.data_mut().ptrs[i0] = ptr;
l1.data_mut().ptrs[i1] = self.sync_block(l0)?;
l2.data_mut().ptrs[i2] = self.sync_block(l1)?;
node.data_mut().level3[i3] = self.sync_block(l2)?;
}
NodeLevel::L4(i4, i3, i2, i1, i0) => {
let mut l3 = self.read_block_or_empty(node.data().level4[i4])?;
let mut l2 = self.read_block_or_empty(l3.data().ptrs[i3])?;
let mut l1 = self.read_block_or_empty(l2.data().ptrs[i2])?;
let mut l0 = self.read_block_or_empty(l1.data().ptrs[i1])?;
l0.data_mut().ptrs[i0] = ptr;
l1.data_mut().ptrs[i1] = self.sync_block(l0)?;
l2.data_mut().ptrs[i2] = self.sync_block(l1)?;
l3.data_mut().ptrs[i3] = self.sync_block(l2)?;
node.data_mut().level4[i4] = self.sync_block(l3)?;
}
}
}
Ok(())
}
pub fn read_node_inner(
&mut self,
node: &TreeData<Node>,
mut offset: u64,
buf: &mut [u8],
) -> Result<usize> {
let node_size = node.data().size();
let record_level = node.data().record_level();
let mut bytes_read = 0;
while bytes_read < buf.len() && offset < node_size {
// How many bytes we've read into the next record
let j = (offset % record_level.bytes()) as usize;
// Number of bytes to read in this iteration
let len = min(
buf.len() - bytes_read, // number of bytes we have left in `buf`
min(
record_level.bytes() - j as u64, // number of bytes we haven't read in this record
node_size - offset, // number of bytes left in this node
) as usize,
);
let record_idx = offset / record_level.bytes();
let record_ptr = self.node_record_ptr(node, record_idx)?;
// The level of the record to read.
// This is at most `record_level` due to the way `len` is computed.
let level = BlockLevel::for_bytes((j + len) as u64);
let record = unsafe { self.read_record(record_ptr, level)? };
buf[bytes_read..bytes_read + len].copy_from_slice(&record.data()[j..j + len]);
bytes_read += len;
offset += len as u64;
}
Ok(bytes_read)
}
pub fn read_node(
&mut self,
node_ptr: TreePtr<Node>,
offset: u64,
buf: &mut [u8],
atime: u64,
atime_nsec: u32,
) -> Result<usize> {
let mut node = self.read_tree(node_ptr)?;
let mut node_changed = false;
let i = self.read_node_inner(&node, offset, buf)?;
if i > 0 {
let node_atime = node.data().atime();
if atime > node_atime.0 || (atime == node_atime.0 && atime_nsec > node_atime.1) {
let is_old = atime - node_atime.0 > 3600; // Last read was more than a day ago
if is_old {
node.data_mut().set_atime(atime, atime_nsec);
node_changed = true;
}
}
}
if node_changed {
self.sync_tree(node)?;
}
Ok(i)
}
pub fn truncate_node_inner(&mut self, node: &mut TreeData<Node>, size: u64) -> Result<bool> {
let old_size = node.data().size();
let record_level = node.data().record_level();
// Size already matches, return
if old_size == size {
return Ok(false);
}
if old_size < size {
// If we're "truncating" to a larger size,
// write zeroes until the size matches
let zeroes = RecordRaw::empty(record_level).unwrap();
let mut offset = old_size;
while offset < size {
let start = offset % record_level.bytes();
if start == 0 {
// We don't have to write completely zero records as read will interpret
// null record pointers as zero records
offset = size;
break;
}
let end = if offset / record_level.bytes() == size / record_level.bytes() {
size % record_level.bytes()
} else {
record_level.bytes()
};
self.write_node_inner(node, &mut offset, &zeroes[start as usize..end as usize])?;
}
assert_eq!(offset, size);
} else {
// Deallocate records
for record in ((size + record_level.bytes() - 1) / record_level.bytes()
..old_size / record_level.bytes())
.rev()
{
self.remove_node_record_ptr(node, record)?;
}
}
// Update size
node.data_mut().set_size(size);
Ok(true)
}
/// Truncate the given node to the given size.
///
/// If `size` is larger than the node's current size,
/// expand the node with zeroes.
pub fn truncate_node(
&mut self,
node_ptr: TreePtr<Node>,
size: u64,
mtime: u64,
mtime_nsec: u32,
) -> Result<()> {
let mut node = self.read_tree(node_ptr)?;
if self.truncate_node_inner(&mut node, size)? {
let node_mtime = node.data().mtime();
if mtime > node_mtime.0 || (mtime == node_mtime.0 && mtime_nsec > node_mtime.1) {
node.data_mut().set_mtime(mtime, mtime_nsec);
}
self.sync_tree(node)?;
}
Ok(())
}
pub fn write_node_inner(
&mut self,
node: &mut TreeData<Node>,
offset: &mut u64,
buf: &[u8],
) -> Result<bool> {
let mut node_changed = false;
let record_level = node.data().record_level();
let node_records = (node.data().size() + record_level.bytes() - 1) / record_level.bytes();
let mut i = 0;
while i < buf.len() {
let j = (*offset % record_level.bytes()) as usize;
let len = min(buf.len() - i, record_level.bytes() as usize - j);
let level = BlockLevel::for_bytes((j + len) as u64);
let mut record_ptr = if node_records > (*offset / record_level.bytes()) {
self.node_record_ptr(node, *offset / record_level.bytes())?
} else {
BlockPtr::null(level)
};
let mut record = unsafe { self.read_record(record_ptr, level)? };
if buf[i..i + len] != record.data()[j..j + len] {
unsafe {
// CoW record using its current level
let mut old_addr = record.swap_addr(self.allocate(record.addr().level())?);
// If the record was resized we need to dealloc the original ptr
if old_addr.is_null() {
old_addr = record_ptr.addr();
}
record.data_mut()[j..j + len].copy_from_slice(&buf[i..i + len]);
record_ptr = self.write_block(record)?;
if !old_addr.is_null() {
self.deallocate(old_addr);
}
}
self.sync_node_record_ptr(node, *offset / record_level.bytes(), record_ptr)?;
node_changed = true;
}
i += len;
*offset += len as u64;
}
if node.data().size() < *offset {
node.data_mut().set_size(*offset);
node_changed = true;
}
Ok(node_changed)
}
/// Write the bytes at `buf` to `node` starting at `offset`.
pub fn write_node(
&mut self,
node_ptr: TreePtr<Node>,
mut offset: u64,
buf: &[u8],
mtime: u64,
mtime_nsec: u32,
) -> Result<usize> {
let mut node = self.read_tree(node_ptr)?;
if self.write_node_inner(&mut node, &mut offset, buf)? {
let node_mtime = node.data().mtime();
if mtime > node_mtime.0 || (mtime == node_mtime.0 && mtime_nsec > node_mtime.1) {
node.data_mut().set_mtime(mtime, mtime_nsec);
}
self.sync_tree(node)?;
}
Ok(buf.len())
}
}
use core::{marker::PhantomData, mem, ops, slice};
use endian_num::Le;
use crate::{BlockLevel, BlockPtr, BlockRaw, BlockTrait};
// 1 << 8 = 256, this is the number of entries in a TreeList
const TREE_LIST_SHIFT: u32 = 8;
const TREE_LIST_ENTRIES: usize = 1 << TREE_LIST_SHIFT;
/// A tree with 4 levels
pub type Tree = TreeList<TreeList<TreeList<TreeList<BlockRaw>>>>;
/// A [`TreePtr`] and the contents of the block it references.
#[derive(Clone, Copy, Debug, Default)]
pub struct TreeData<T> {
/// The value of the [`TreePtr`]
id: u32,
// The data
data: T,
}
impl<T> TreeData<T> {
pub fn new(id: u32, data: T) -> Self {
Self { id, data }
}
pub fn id(&self) -> u32 {
self.id
}
pub fn data(&self) -> &T {
&self.data
}
pub fn data_mut(&mut self) -> &mut T {
&mut self.data
}
pub fn into_data(self) -> T {
self.data
}
pub fn ptr(&self) -> TreePtr<T> {
TreePtr {
id: self.id.into(),
phantom: PhantomData,
}
}
}
/// A list of pointers to blocks of type `T`.
/// This is one level of a [`Tree`], defined above.
#[repr(C, packed)]
pub struct TreeList<T> {
pub ptrs: [BlockPtr<T>; TREE_LIST_ENTRIES],
}
unsafe impl<T> BlockTrait for TreeList<T> {
fn empty(level: BlockLevel) -> Option<Self> {
if level.0 == 0 {
Some(Self {
ptrs: [BlockPtr::default(); TREE_LIST_ENTRIES],
})
} else {
None
}
}
}
impl<T> ops::Deref for TreeList<T> {
type Target = [u8];
fn deref(&self) -> &[u8] {
unsafe {
slice::from_raw_parts(
self as *const TreeList<T> as *const u8,
mem::size_of::<TreeList<T>>(),
) as &[u8]
}
}
}
impl<T> ops::DerefMut for TreeList<T> {
fn deref_mut(&mut self) -> &mut [u8] {
unsafe {
slice::from_raw_parts_mut(
self as *mut TreeList<T> as *mut u8,
mem::size_of::<TreeList<T>>(),
) as &mut [u8]
}
}
}
/// A pointer to an entry in a [`Tree`].
#[repr(C, packed)]
pub struct TreePtr<T> {
id: Le<u32>,
phantom: PhantomData<T>,
}
impl<T> TreePtr<T> {
/// Get a [`TreePtr`] to the filesystem root
/// directory's node.
pub fn root() -> Self {
Self::new(1)
}
pub fn new(id: u32) -> Self {
Self {
id: id.into(),
phantom: PhantomData,
}
}
/// Create a [`TreePtr`] from [`Tree`] indices,
/// Where `indexes` is `(i3, i2, i1, i0)`.
/// - `i3` is the index into the level 3 table,
/// - `i2` is the index into the level 2 table at `i3`
/// - ...and so on.
pub fn from_indexes(indexes: (usize, usize, usize, usize)) -> Self {
const SHIFT: u32 = TREE_LIST_SHIFT;
let id = ((indexes.0 << (3 * SHIFT)) as u32)
| ((indexes.1 << (2 * SHIFT)) as u32)
| ((indexes.2 << SHIFT) as u32)
| (indexes.3 as u32);
Self {
id: id.into(),
phantom: PhantomData,
}
}
pub fn id(&self) -> u32 {
self.id.to_ne()
}
pub fn is_null(&self) -> bool {
self.id() == 0
}
/// Get this indices of this [`TreePtr`] in a [`Tree`].
/// Returns `(i3, i2, i1, i0)`:
/// - `i3` is the index into the level 3 table,
/// - `i2` is the index into the level 2 table at `i3`
/// - ...and so on.
pub fn indexes(&self) -> (usize, usize, usize, usize) {
const SHIFT: u32 = TREE_LIST_SHIFT;
const NUM: u32 = 1 << SHIFT;
const MASK: u32 = NUM - 1;
let id = self.id();
let i3 = ((id >> (3 * SHIFT)) & MASK) as usize;
let i2 = ((id >> (2 * SHIFT)) & MASK) as usize;
let i1 = ((id >> SHIFT) & MASK) as usize;
let i0 = (id & MASK) as usize;
return (i3, i2, i1, i0);
}
}
impl<T> Clone for TreePtr<T> {
fn clone(&self) -> Self {
*self
}
}
impl<T> Copy for TreePtr<T> {}
impl<T> Default for TreePtr<T> {
fn default() -> Self {
Self {
id: 0.into(),
phantom: PhantomData,
}
}
}
#[test]
fn tree_list_size_test() {
assert_eq!(
mem::size_of::<TreeList<BlockRaw>>(),
crate::BLOCK_SIZE as usize
);
}
use std::{
fs,
io::{self},
process::{Command, ExitStatus},
};
fn unmount_linux_path(mount_path: &str) -> io::Result<ExitStatus> {
// Different distributions can have various fusermount binaries. Try
// them all.
let commands = ["fusermount", "fusermount3"];
for command in commands {
let status = Command::new(command).arg("-u").arg(mount_path).status();
if status.is_ok() {
return status;
}
if let Err(ref e) = status {
if e.kind() == io::ErrorKind::NotFound {
continue;
}
}
}
// Unmounting failed since no suitable command was found
Err(std::io::Error::new(
io::ErrorKind::NotFound,
format!(
"Unable to locate any fusermount binaries. Tried {:?}. Is fuse installed?",
commands
),
))
}
pub fn unmount_path(mount_path: &str) -> Result<(), io::Error> {
if cfg!(target_os = "redox") {
fs::remove_file(format!(":{}", mount_path))?
} else {
let status_res = if cfg!(target_os = "linux") {
unmount_linux_path(mount_path)
} else {
Command::new("umount").arg(mount_path).status()
};
let status = status_res?;
if !status.success() {
return Err(io::Error::new(
io::ErrorKind::Other,
"redoxfs umount failed",
));
}
}
Ok(())
}
#!/usr/bin/env bash
CARGO_ARGS=(--release)
TARGET=target/release
export RUST_BACKTRACE=full
export RUST_LOG=info
function cleanup {
sync
fusermount -u image || true
fusermount3 -u image || true
}
trap 'cleanup' ERR
set -eEx
cleanup
redoxer test -- --lib -- --nocapture
cargo test --lib --no-default-features -- --nocapture
cargo test --lib -- --nocapture
cargo build "${CARGO_ARGS[@]}"
rm -f image.bin
fallocate -l 1G image.bin
time "${TARGET}/redoxfs-mkfs" image.bin
mkdir -p image
"${TARGET}/redoxfs" image.bin image
df -h image
ls -lah image
mkdir image/test
time cp -r src image/test/src
dd if=/dev/urandom of=image/test/random bs=1M count=256
dd if=image/test/random of=/dev/null bs=1M count=256
time truncate --size=256M image/test/sparse
dd if=image/test/sparse of=/dev/null bs=1M count=256
dd if=/dev/zero of=image/test/zero bs=1M count=256
dd if=image/test/zero of=/dev/null bs=1M count=256
ls -lah image/test
df -h image
rm image/test/random
rm image/test/sparse
rm image/test/zero
rm -rf image/test/src
rmdir image/test
df -h image
ls -lah image
cleanup
"${TARGET}/redoxfs" image.bin image
df -h image
ls -lah image
cleanup