Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • redox-os/redoxfs
  • jD91mZM2/redoxfs
  • microcolonel/redoxfs
  • rm-dr/redoxfs
  • deepaksirone/redoxfs
  • sevenEng/redoxfs
  • mortona/redoxfs
  • potatogim/redoxfs
  • 4lDO2/redoxfs
  • malandrakisgeo/redoxfs
  • ssd/redoxfs
  • dahc/redoxfs
  • ashton/redoxfs
  • usapmz/redoxfs
  • vadorovsky/redoxfs
  • bjorn3/redoxfs
  • rw_van/redoxfs
  • mkroening/redoxfs
  • emaxx-g/redoxfs
  • CILP/redoxfs
  • AnandSMain/redoxfs
  • aaronjanse/redoxfs
  • liamnprg/redoxfs
  • coolreader18/redoxfs
  • freewilll/redoxfs
  • adi-g15/redoxfs
  • andrey.turkin/redoxfs
  • matlik/redoxfs
28 results
Show changes
#[cfg(not(target_os = "redox"))]
#[cfg(all(not(target_os = "redox"), not(fuzzing)))]
mod fuse;
#[cfg(all(not(target_os = "redox"), fuzzing))]
pub mod fuse;
#[cfg(not(target_os = "redox"))]
pub use self::fuse::mount;
......
use std::fs::File;
use std::io::{self, Read, Write};
use redox_scheme::{RequestKind, SignalBehavior, Socket, V2};
use std::io;
use std::path::Path;
use std::sync::atomic::Ordering;
use syscall::{Packet, SchemeMut};
use crate::{Disk, FileSystem, IS_UMT};
use crate::{Disk, FileSystem, Transaction, IS_UMT};
use self::scheme::FileScheme;
......@@ -15,41 +14,37 @@ pub fn mount<D, P, T, F>(filesystem: FileSystem<D>, mountpoint: P, mut callback:
where
D: Disk,
P: AsRef<Path>,
F: FnMut(&Path) -> T,
F: FnOnce(&Path) -> T,
{
let mountpoint = mountpoint.as_ref();
let socket_path = format!(":{}", mountpoint.display());
let mut socket = File::create(&socket_path)?;
let socket = Socket::<V2>::create(&format!("{}", mountpoint.display()))?;
let mounted_path = format!("{}:", mountpoint.display());
let res = callback(Path::new(&mounted_path));
let mut scheme = FileScheme::new(format!("{}", mountpoint.display()), filesystem);
loop {
if IS_UMT.load(Ordering::SeqCst) > 0 {
break Ok(res);
}
let mut packet = Packet::default();
match socket.read(&mut packet) {
Ok(0) => break Ok(res),
Ok(_ok) => (),
Err(err) => {
if err.kind() == io::ErrorKind::Interrupted {
continue;
while IS_UMT.load(Ordering::SeqCst) == 0 {
let req = match socket.next_request(SignalBehavior::Restart)? {
None => break,
Some(req) => {
if let RequestKind::Call(r) = req.kind() {
r
} else {
break Err(err);
// TODO: Redoxfs does not yet support asynchronous file IO. It might still make
// sense to implement cancellation for huge buffers, e.g. dd bs=1G
continue;
}
}
}
};
let response = req.handle_scheme_mut(&mut scheme);
scheme.handle(&mut packet);
match socket.write(&packet) {
Ok(_ok) => (),
Err(err) => {
break Err(err);
}
if !socket.write_response(response, SignalBehavior::Restart)? {
break;
}
}
// Squash allocations and sync on unmount
let _ = Transaction::new(&mut scheme.fs).commit(true);
Ok(res)
}
use std::cmp::{max, min};
use std::slice;
use std::time::{SystemTime, UNIX_EPOCH};
use alloc::collections::BTreeMap;
use libredox::call::MmapArgs;
use range_tree::RangeTree;
use syscall::{MAP_PRIVATE, PAGE_SIZE, EBADFD};
use syscall::data::{Map, Stat, TimeSpec};
use syscall::error::{Error, Result, EBADF, EINVAL, EISDIR, ENOMEM, EPERM};
use syscall::data::{Stat, TimeSpec};
use syscall::error::{Error, Result, EBADF, EINVAL, EISDIR, EPERM};
use syscall::flag::{
MapFlags, F_GETFL, F_SETFL, MODE_PERM, O_ACCMODE, O_APPEND, O_RDONLY, O_RDWR, O_WRONLY,
PROT_READ, PROT_WRITE, SEEK_CUR, SEEK_END, SEEK_SET,
PROT_READ, PROT_WRITE,
};
use syscall::{EBADFD, PAGE_SIZE};
use crate::{Disk, Node, Transaction, TreePtr};
......@@ -28,15 +28,28 @@ pub trait Resource<D: Disk> {
fn set_path(&mut self, path: &str);
fn read(&mut self, buf: &mut [u8], tx: &mut Transaction<D>) -> Result<usize>;
fn read(&mut self, buf: &mut [u8], offset: u64, tx: &mut Transaction<D>) -> Result<usize>;
fn write(&mut self, buf: &[u8], tx: &mut Transaction<D>) -> Result<usize>;
fn write(&mut self, buf: &[u8], offset: u64, tx: &mut Transaction<D>) -> Result<usize>;
fn seek(&mut self, offset: isize, whence: usize, tx: &mut Transaction<D>) -> Result<isize>;
fn fsize(&mut self, tx: &mut Transaction<D>) -> Result<u64>;
fn fmap(&mut self, fmaps: &mut Fmaps, flags: MapFlags, size: usize, offset: u64, tx: &mut Transaction<D>) -> Result<usize>;
fn fmap(
&mut self,
fmaps: &mut Fmaps,
flags: MapFlags,
size: usize,
offset: u64,
tx: &mut Transaction<D>,
) -> Result<usize>;
fn funmap(&mut self, fmaps: &mut Fmaps, offset: u64, size: usize, tx: &mut Transaction<D>) -> Result<usize>;
fn funmap(
&mut self,
fmaps: &mut Fmaps,
offset: u64,
size: usize,
tx: &mut Transaction<D>,
) -> Result<usize>;
fn fchmod(&mut self, mode: u16, tx: &mut Transaction<D>) -> Result<usize> {
let mut node = tx.read_tree(self.node_ptr())?;
......@@ -129,7 +142,6 @@ pub struct DirResource {
parent_ptr_opt: Option<TreePtr<Node>>,
node_ptr: TreePtr<Node>,
data: Option<Vec<u8>>,
seek: isize,
uid: u32,
}
......@@ -146,7 +158,6 @@ impl DirResource {
parent_ptr_opt,
node_ptr,
data,
seek: 0,
uid,
}
}
......@@ -171,7 +182,6 @@ impl<D: Disk> Resource<D> for DirResource {
parent_ptr_opt: self.parent_ptr_opt,
node_ptr: self.node_ptr,
data: self.data.clone(),
seek: self.seek,
uid: self.uid,
}))
}
......@@ -180,38 +190,43 @@ impl<D: Disk> Resource<D> for DirResource {
self.path = path.to_string();
}
fn read(&mut self, buf: &mut [u8], _tx: &mut Transaction<D>) -> Result<usize> {
fn read(&mut self, buf: &mut [u8], offset: u64, _tx: &mut Transaction<D>) -> Result<usize> {
let data = self.data.as_ref().ok_or(Error::new(EISDIR))?;
let size = data.len() as isize;
let mut i = 0;
while i < buf.len() && self.seek < size {
buf[i] = data[self.seek as usize];
i += 1;
self.seek += 1;
}
Ok(i)
let src = usize::try_from(offset)
.ok()
.and_then(|o| data.get(o..))
.unwrap_or(&[]);
let byte_count = core::cmp::min(src.len(), buf.len());
buf[..byte_count].copy_from_slice(&src[..byte_count]);
Ok(byte_count)
}
fn write(&mut self, _buf: &[u8], _tx: &mut Transaction<D>) -> Result<usize> {
fn write(&mut self, _buf: &[u8], _offset: u64, _tx: &mut Transaction<D>) -> Result<usize> {
Err(Error::new(EBADF))
}
fn seek(&mut self, offset: isize, whence: usize, _tx: &mut Transaction<D>) -> Result<isize> {
let data = self.data.as_ref().ok_or(Error::new(EBADF))?;
let size = data.len() as isize;
self.seek = match whence {
SEEK_SET => max(0, min(size, offset)),
SEEK_CUR => max(0, min(size, self.seek + offset)),
SEEK_END => max(0, min(size, size + offset)),
_ => return Err(Error::new(EINVAL)),
};
Ok(self.seek)
fn fsize(&mut self, _tx: &mut Transaction<D>) -> Result<u64> {
Ok(self.data.as_ref().ok_or(Error::new(EBADF))?.len() as u64)
}
fn fmap(&mut self, _fmaps: &mut Fmaps, _flags: MapFlags, _size: usize, _offset: u64, _tx: &mut Transaction<D>) -> Result<usize> {
fn fmap(
&mut self,
_fmaps: &mut Fmaps,
_flags: MapFlags,
_size: usize,
_offset: u64,
_tx: &mut Transaction<D>,
) -> Result<usize> {
Err(Error::new(EBADF))
}
fn funmap(&mut self, _fmaps: &mut Fmaps, _offset: u64, _size: usize, _tx: &mut Transaction<D>) -> Result<usize> {
fn funmap(
&mut self,
_fmaps: &mut Fmaps,
_offset: u64,
_size: usize,
_tx: &mut Transaction<D>,
) -> Result<usize> {
Err(Error::new(EBADF))
}
......@@ -263,16 +278,11 @@ impl Fmap {
let buf = slice::from_raw_parts_mut(address, unaligned_size);
let count = match tx.read_node(
node_ptr,
offset,
buf,
atime.as_secs(),
atime.subsec_nanos(),
) {
let count = match tx.read_node(node_ptr, offset, buf, atime.as_secs(), atime.subsec_nanos())
{
Ok(ok) => ok,
Err(err) => {
let _ = syscall::funmap(address as usize, aligned_size);
let _ = libredox::call::munmap(address.cast(), aligned_size);
return Err(err);
}
};
......@@ -287,7 +297,14 @@ impl Fmap {
})
}
pub unsafe fn sync<D: Disk>(&mut self, node_ptr: TreePtr<Node>, base: *mut u8, offset: u64, size: usize, tx: &mut Transaction<D>) -> Result<()> {
pub unsafe fn sync<D: Disk>(
&mut self,
node_ptr: TreePtr<Node>,
base: *mut u8,
offset: u64,
size: usize,
tx: &mut Transaction<D>,
) -> Result<()> {
if self.flags & PROT_WRITE == PROT_WRITE {
let mtime = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
tx.write_node(
......@@ -307,11 +324,12 @@ pub struct FileResource {
parent_ptr_opt: Option<TreePtr<Node>>,
node_ptr: TreePtr<Node>,
flags: usize,
seek: isize,
uid: u32,
}
#[derive(Debug)]
pub struct FileMmapInfo {
base: *mut u8,
size: usize,
ranges: RangeTree<Fmap>,
pub open_fds: usize,
}
......@@ -319,6 +337,7 @@ impl Default for FileMmapInfo {
fn default() -> Self {
Self {
base: core::ptr::null_mut(),
size: 0,
ranges: RangeTree::new(),
open_fds: 0,
}
......@@ -338,7 +357,6 @@ impl FileResource {
parent_ptr_opt,
node_ptr,
flags,
seek: 0,
uid,
}
}
......@@ -363,7 +381,6 @@ impl<D: Disk> Resource<D> for FileResource {
parent_ptr_opt: self.parent_ptr_opt,
node_ptr: self.node_ptr,
flags: self.flags,
seek: self.seek,
uid: self.uid,
}))
}
......@@ -372,58 +389,53 @@ impl<D: Disk> Resource<D> for FileResource {
self.path = path.to_string();
}
fn read(&mut self, buf: &mut [u8], tx: &mut Transaction<D>) -> Result<usize> {
if self.flags & O_ACCMODE == O_RDWR || self.flags & O_ACCMODE == O_RDONLY {
let atime = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
let count = tx.read_node(
self.node_ptr,
self.seek as u64,
buf,
atime.as_secs(),
atime.subsec_nanos(),
)?;
self.seek += count as isize;
Ok(count)
} else {
Err(Error::new(EBADF))
fn read(&mut self, buf: &mut [u8], offset: u64, tx: &mut Transaction<D>) -> Result<usize> {
if self.flags & O_ACCMODE != O_RDWR && self.flags & O_ACCMODE != O_RDONLY {
return Err(Error::new(EBADF));
}
let atime = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
tx.read_node(
self.node_ptr,
offset,
buf,
atime.as_secs(),
atime.subsec_nanos(),
)
}
fn write(&mut self, buf: &[u8], tx: &mut Transaction<D>) -> Result<usize> {
if self.flags & O_ACCMODE == O_RDWR || self.flags & O_ACCMODE == O_WRONLY {
if self.flags & O_APPEND == O_APPEND {
let node = tx.read_tree(self.node_ptr)?;
self.seek = node.data().size() as isize;
}
let mtime = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
let count = tx.write_node(
self.node_ptr,
self.seek as u64,
buf,
mtime.as_secs(),
mtime.subsec_nanos(),
)?;
self.seek += count as isize;
Ok(count)
} else {
Err(Error::new(EBADF))
fn write(&mut self, buf: &[u8], offset: u64, tx: &mut Transaction<D>) -> Result<usize> {
if self.flags & O_ACCMODE != O_RDWR && self.flags & O_ACCMODE != O_WRONLY {
return Err(Error::new(EBADF));
}
let effective_offset = if self.flags & O_APPEND == O_APPEND {
let node = tx.read_tree(self.node_ptr)?;
node.data().size()
} else {
offset
};
let mtime = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
tx.write_node(
self.node_ptr,
effective_offset,
buf,
mtime.as_secs(),
mtime.subsec_nanos(),
)
}
fn seek(&mut self, offset: isize, whence: usize, tx: &mut Transaction<D>) -> Result<isize> {
self.seek = match whence {
SEEK_SET => max(0, offset),
SEEK_CUR => max(0, self.seek + offset),
SEEK_END => {
let node = tx.read_tree(self.node_ptr)?;
max(0, node.data().size() as isize + offset)
}
_ => return Err(Error::new(EINVAL)),
};
Ok(self.seek)
fn fsize(&mut self, tx: &mut Transaction<D>) -> Result<u64> {
let node = tx.read_tree(self.node_ptr)?;
Ok(node.data().size())
}
fn fmap(&mut self, fmaps: &mut Fmaps, flags: MapFlags, unaligned_size: usize, offset: u64, tx: &mut Transaction<D>) -> Result<usize> {
fn fmap(
&mut self,
fmaps: &mut Fmaps,
flags: MapFlags,
unaligned_size: usize,
offset: u64,
tx: &mut Transaction<D>,
) -> Result<usize> {
//dbg!(&self.fmaps);
let accmode = self.flags & O_ACCMODE;
if flags.contains(PROT_READ) && !(accmode == O_RDWR || accmode == O_RDONLY) {
......@@ -441,33 +453,44 @@ impl<D: Disk> Resource<D> for FileResource {
// TODO: Pass entry directory to Resource trait functions, since the node_ptr can be
// obtained by the caller.
let fmap_info = fmaps.get_mut(&self.node_ptr.id()).ok_or(Error::new(EBADFD))?;
let max_offset = fmap_info.ranges.end();
if offset + aligned_size as u64 > max_offset {
if fmap_info.base.is_null() {
fmap_info.base = unsafe {
syscall::fmap(!0, &Map {
size: offset as usize + aligned_size,
let fmap_info = fmaps
.get_mut(&self.node_ptr.id())
.ok_or(Error::new(EBADFD))?;
let new_size = (offset as usize + aligned_size).next_multiple_of(PAGE_SIZE);
if new_size > fmap_info.size {
fmap_info.base = if fmap_info.base.is_null() {
unsafe {
libredox::call::mmap(MmapArgs {
length: new_size,
// PRIVATE/SHARED doesn't matter once the pages are passed in the fmap
// handler.
flags: MapFlags::PROT_READ | MapFlags::PROT_WRITE | MapFlags::MAP_PRIVATE,
prot: libredox::flag::PROT_READ | libredox::flag::PROT_WRITE,
flags: libredox::flag::MAP_PRIVATE,
offset: 0,
address: 0,
fd: !0,
addr: core::ptr::null_mut(),
})? as *mut u8
};
}
} else {
let new_size = (offset as usize + aligned_size).next_multiple_of(PAGE_SIZE);
let old_size = max_offset as usize;
fmap_info.base = unsafe {
syscall::syscall5(syscall::SYS_MREMAP, fmap_info.base as usize, old_size, 0, new_size, syscall::MremapFlags::empty().bits() | (PROT_READ | PROT_WRITE).bits())? as *mut u8
};
}
unsafe {
syscall::syscall5(
syscall::SYS_MREMAP,
fmap_info.base as usize,
fmap_info.size,
0,
new_size,
syscall::MremapFlags::empty().bits() | (PROT_READ | PROT_WRITE).bits(),
)? as *mut u8
}
};
fmap_info.size = new_size;
}
let affected_fmaps = fmap_info.ranges.remove_and_unused(offset..offset + aligned_size as u64);
let affected_fmaps = fmap_info
.ranges
.remove_and_unused(offset..offset + aligned_size as u64);
for (range, v_opt) in affected_fmaps {
//dbg!(&range);
......@@ -475,9 +498,20 @@ impl<D: Disk> Resource<D> for FileResource {
fmap.rc += 1;
fmap.flags |= flags;
fmap_info.ranges.insert(range.start, range.end - range.start, fmap);
fmap_info
.ranges
.insert(range.start, range.end - range.start, fmap);
} else {
let map = unsafe { Fmap::new(self.node_ptr, flags, unaligned_size, offset, fmap_info.base, tx)? };
let map = unsafe {
Fmap::new(
self.node_ptr,
flags,
unaligned_size,
offset,
fmap_info.base,
tx,
)?
};
fmap_info.ranges.insert(offset, aligned_size as u64, map);
}
}
......@@ -486,11 +520,20 @@ impl<D: Disk> Resource<D> for FileResource {
Ok(fmap_info.base as usize + offset as usize)
}
fn funmap(&mut self, fmaps: &mut Fmaps, offset: u64, size: usize, tx: &mut Transaction<D>) -> Result<usize> {
let fmap_info = fmaps.get_mut(&self.node_ptr.id()).ok_or(Error::new(EBADFD))?;
fn funmap(
&mut self,
fmaps: &mut Fmaps,
offset: u64,
size: usize,
tx: &mut Transaction<D>,
) -> Result<usize> {
let fmap_info = fmaps
.get_mut(&self.node_ptr.id())
.ok_or(Error::new(EBADFD))?;
//dbg!(&self.fmaps);
//dbg!(self.fmaps.conflicts(offset..offset + size as u64).collect::<Vec<_>>());
#[allow(unused_mut)]
let mut affected_fmaps = fmap_info.ranges.remove(offset..offset + size as u64);
for (range, mut fmap) in affected_fmaps {
......@@ -498,11 +541,19 @@ impl<D: Disk> Resource<D> for FileResource {
//log::info!("SYNCING {}..{}", range.start, range.end);
unsafe {
fmap.sync(self.node_ptr, fmap_info.base, range.start, (range.end - range.start) as usize, tx)?;
fmap.sync(
self.node_ptr,
fmap_info.base,
range.start,
(range.end - range.start) as usize,
tx,
)?;
}
if fmap.rc > 0 {
fmap_info.ranges.insert(range.start, range.end - range.start, fmap);
fmap_info
.ranges
.insert(range.start, range.end - range.start, fmap);
}
}
//dbg!(&self.fmaps);
......@@ -529,7 +580,13 @@ impl<D: Disk> Resource<D> for FileResource {
if let Some(fmap_info) = fmaps.get_mut(&self.node_ptr.id()) {
for (range, fmap) in fmap_info.ranges.iter_mut() {
unsafe {
fmap.sync(self.node_ptr, fmap_info.base, range.start, (range.end - range.start) as usize, tx)?;
fmap.sync(
self.node_ptr,
fmap_info.base,
range.start,
(range.end - range.start) as usize,
tx,
)?;
}
}
}
......@@ -615,9 +672,15 @@ impl range_tree::Value for Fmap {
Err(self)
}
}
fn split(self, prev_range: Option<core::ops::Range<Self::K>>, range: core::ops::Range<Self::K>, next_range: Option<core::ops::Range<Self::K>>) -> (Option<Self>, Self, Option<Self>) {
#[allow(unused_variables)]
fn split(
self,
prev_range: Option<core::ops::Range<Self::K>>,
range: core::ops::Range<Self::K>,
next_range: Option<core::ops::Range<Self::K>>,
) -> (Option<Self>, Self, Option<Self>) {
(
prev_range.map(|range| Fmap {
prev_range.map(|_range| Fmap {
rc: self.rc,
flags: self.flags,
last_page_tail: 0,
......@@ -625,9 +688,13 @@ impl range_tree::Value for Fmap {
Fmap {
rc: self.rc,
flags: self.flags,
last_page_tail: if next_range.is_none() { self.last_page_tail } else { 0 },
last_page_tail: if next_range.is_none() {
self.last_page_tail
} else {
0
},
},
next_range.map(|range| Fmap {
next_range.map(|_range| Fmap {
rc: self.rc,
flags: self.flags,
last_page_tail: self.last_page_tail,
......
......@@ -3,17 +3,23 @@ use std::str;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::time::{SystemTime, UNIX_EPOCH};
use syscall::{EBADFD, MunmapFlags};
use syscall::data::{Map, Stat, StatVfs, TimeSpec};
use redox_scheme::{CallerCtx, OpenResult, SchemeMut};
use syscall::data::{Stat, StatVfs, TimeSpec};
use syscall::error::{
Error, Result, EACCES, EBADF, EBUSY, EEXIST, EINVAL, EISDIR, ELOOP, ENOENT, ENOTDIR, ENOTEMPTY,
EPERM, EXDEV,
};
use syscall::flag::{
EventFlags, MapFlags, MODE_PERM, O_ACCMODE, O_CREAT, O_DIRECTORY, O_EXCL, O_NOFOLLOW, O_RDONLY,
O_RDWR, O_STAT, O_SYMLINK, O_TRUNC, O_WRONLY,
EventFlags, MapFlags, O_ACCMODE, O_CREAT, O_DIRECTORY, O_EXCL, O_NOFOLLOW, O_RDONLY, O_RDWR,
O_STAT, O_SYMLINK, O_TRUNC, O_WRONLY,
};
use syscall::schemev2::NewFdFlags;
use syscall::{MunmapFlags, EBADFD};
use redox_path::{
canonicalize_to_standard, canonicalize_using_cwd, canonicalize_using_scheme, scheme_path,
RedoxPath,
};
use syscall::scheme::SchemeMut;
use crate::{Disk, FileSystem, Node, Transaction, TreeData, TreePtr, BLOCK_SIZE};
......@@ -21,7 +27,7 @@ use super::resource::{DirResource, FileResource, Resource};
pub struct FileScheme<D: Disk> {
name: String,
fs: FileSystem<D>,
pub(crate) fs: FileSystem<D>,
next_id: AtomicUsize,
files: BTreeMap<usize, Box<dyn Resource<D>>>,
fmap: super::resource::Fmaps,
......@@ -43,15 +49,21 @@ impl<D: Disk> FileScheme<D> {
tx: &mut Transaction<D>,
uid: u32,
gid: u32,
url: &str,
full_path: &str,
node: TreeData<Node>,
nodes: &mut Vec<(TreeData<Node>, String)>,
) -> Result<Vec<u8>> {
) -> Result<String> {
let atime = SystemTime::now().duration_since(UNIX_EPOCH).unwrap();
// symbolic link is relative to this part of the url
let mut working_dir =
dirname(full_path).unwrap_or(scheme_path(scheme_name).ok_or(Error::new(EINVAL))?);
// node of the link
let mut node = node;
for _ in 0..32 {
// XXX What should the limit be?
assert!(node.data().is_symlink());
let mut buf = [0; 4096];
let count = tx.read_node(
node.ptr(),
......@@ -60,24 +72,30 @@ impl<D: Disk> FileScheme<D> {
atime.as_secs(),
atime.subsec_nanos(),
)?;
let scheme = format!("{}:", scheme_name);
let canon = canonicalize(url.as_bytes(), &buf[0..count]);
let path = str::from_utf8(&canon[scheme.len()..])
.unwrap_or("")
.trim_matches('/');
let target = canonicalize_to_standard(
Some(&working_dir),
str::from_utf8(&buf[..count]).or(Err(Error::new(EINVAL)))?,
)
.ok_or(Error::new(EINVAL))?;
let target_as_path = RedoxPath::from_absolute(&target).ok_or(Error::new(EINVAL))?;
let (scheme, reference) = target_as_path.as_parts().ok_or(Error::new(EINVAL))?;
if scheme.as_ref() != scheme_name {
return Err(Error::new(EXDEV));
}
let target_reference = reference.to_string();
nodes.clear();
if let Some((next_node, next_node_name)) =
Self::path_nodes(scheme_name, tx, path, uid, gid, nodes)?
Self::path_nodes(scheme_name, tx, &target_reference, uid, gid, nodes)?
{
if !next_node.data().is_symlink() {
if canon.starts_with(scheme.as_bytes()) {
nodes.push((next_node, next_node_name));
return Ok(canon[scheme.len()..].to_vec());
} else {
return Err(Error::new(EXDEV));
}
nodes.push((next_node, next_node_name));
return Ok(target_reference);
}
node = next_node;
working_dir = dirname(&target).ok_or(Error::new(EINVAL))?.to_string();
} else {
return Err(Error::new(ENOENT));
}
......@@ -141,78 +159,15 @@ impl<D: Disk> FileScheme<D> {
}
}
/// Make a relative path absolute
/// Given a cwd of "scheme:/path"
/// This function will turn "foo" into "scheme:/path/foo"
/// "/foo" will turn into "scheme:/foo"
/// "bar:/foo" will be used directly, as it is already absolute
pub fn canonicalize(current: &[u8], path: &[u8]) -> Vec<u8> {
// This function is modified from a version in the kernel
let mut canon = if path.iter().position(|&b| b == b':').is_none() {
let cwd = &current[0..current.iter().rposition(|x| *x == '/' as u8).unwrap_or(0)];
let mut canon = if !path.starts_with(b"/") {
let mut c = cwd.to_vec();
if !c.ends_with(b"/") {
c.push(b'/');
}
c
} else {
cwd[..cwd.iter().position(|&b| b == b':').map_or(1, |i| i + 1)].to_vec()
};
canon.extend_from_slice(&path);
canon
} else {
path.to_vec()
};
// NOTE: assumes the scheme does not include anything like "../" or "./"
let mut result = {
let parts = canon
.split(|&c| c == b'/')
.filter(|&part| part != b".")
.rev()
.scan(0, |nskip, part| {
if part == b"." {
Some(None)
} else if part == b".." {
*nskip += 1;
Some(None)
} else {
if *nskip > 0 {
*nskip -= 1;
Some(None)
} else {
Some(Some(part))
}
}
})
.filter_map(|x| x)
.collect::<Vec<_>>();
parts.iter().rev().fold(Vec::new(), |mut vec, &part| {
vec.extend_from_slice(part);
vec.push(b'/');
vec
})
};
result.pop(); // remove extra '/'
// replace with the root of the scheme if it's empty
if result.len() == 0 {
let pos = canon
.iter()
.position(|&b| b == b':')
.map_or(canon.len(), |p| p + 1);
canon.truncate(pos);
canon
} else {
result
}
/// given a path with a scheme, return the containing directory (or scheme)
fn dirname(path: &str) -> Option<String> {
canonicalize_using_cwd(Some(path), "..")
}
impl<D: Disk> SchemeMut for FileScheme<D> {
fn open(&mut self, url: &str, flags: usize, uid: u32, gid: u32) -> Result<usize> {
fn xopen(&mut self, url: &str, flags: usize, ctx: &CallerCtx) -> Result<OpenResult> {
let CallerCtx { uid, gid, .. } = *ctx;
let path = url.trim_matches('/');
// println!("Open '{}' {:X}", path, flags);
......@@ -272,20 +227,20 @@ impl<D: Disk> SchemeMut for FileScheme<D> {
&& flags & O_SYMLINK != O_SYMLINK
{
let mut resolve_nodes = Vec::new();
let full_path =
canonicalize_using_scheme(scheme_name, url).ok_or(Error::new(EINVAL))?;
let resolved = self.fs.tx(|tx| {
Self::resolve_symlink(
scheme_name,
tx,
uid,
gid,
&format!("{}:/{}", scheme_name, url),
&full_path,
node,
&mut resolve_nodes,
)
})?;
let resolved_utf8 =
str::from_utf8(&resolved).map_err(|_| Error::new(EINVAL))?;
return self.open(resolved_utf8, flags, uid, gid);
return self.xopen(&resolved, flags, ctx);
} else if !node.data().is_symlink() && flags & O_SYMLINK == O_SYMLINK {
return Err(Error::new(EINVAL));
} else {
......@@ -401,41 +356,17 @@ impl<D: Disk> SchemeMut for FileScheme<D> {
}
}
};
self.fmap.entry(resource.node_ptr().id()).or_insert_with(Default::default).open_fds += 1;
self.fmap
.entry(resource.node_ptr().id())
.or_insert_with(Default::default)
.open_fds += 1;
let id = self.next_id.fetch_add(1, Ordering::SeqCst);
let id = self.next_id.fetch_add(1, Ordering::Relaxed);
self.files.insert(id, resource);
Ok(id)
}
fn chmod(&mut self, url: &str, mode: u16, uid: u32, gid: u32) -> Result<usize> {
let path = url.trim_matches('/');
// println!("Chmod '{}'", path);
let scheme_name = &self.name;
self.fs.tx(|tx| {
let mut nodes = Vec::new();
if let Some((mut node, _node_name)) =
Self::path_nodes(scheme_name, tx, path, uid, gid, &mut nodes)?
{
if node.data().uid() == uid || uid == 0 {
let old_mode = node.data().mode();
let new_mode = (old_mode & !MODE_PERM) | (mode & MODE_PERM);
if old_mode != new_mode {
node.data_mut().set_mode(new_mode);
tx.sync_tree(node)?;
}
Ok(0)
} else {
Err(Error::new(EPERM))
}
} else {
Err(Error::new(ENOENT))
}
Ok(OpenResult::ThisScheme {
number: id,
flags: NewFdFlags::POSITIONED,
})
}
......@@ -447,31 +378,32 @@ impl<D: Disk> SchemeMut for FileScheme<D> {
let scheme_name = &self.name;
self.fs.tx(|tx| {
let mut nodes = Vec::new();
if let Some((child, child_name)) =
let Some((child, child_name)) =
Self::path_nodes(scheme_name, tx, path, uid, gid, &mut nodes)?
{
if let Some((parent, _parent_name)) = nodes.last() {
if !parent.data().permission(uid, gid, Node::MODE_WRITE) {
// println!("dir not writable {:o}", parent.1.mode);
return Err(Error::new(EACCES));
}
else {
return Err(Error::new(ENOENT));
};
if child.data().is_dir() {
if !child.data().permission(uid, gid, Node::MODE_WRITE) {
// println!("dir not writable {:o}", parent.1.mode);
return Err(Error::new(EACCES));
}
let Some((parent, _parent_name)) = nodes.last() else {
return Err(Error::new(EPERM));
};
tx.remove_node(parent.ptr(), &child_name, Node::MODE_DIR)
.and(Ok(0))
} else {
Err(Error::new(ENOTDIR))
}
} else {
Err(Error::new(EPERM))
if !parent.data().permission(uid, gid, Node::MODE_WRITE) {
// println!("dir not writable {:o}", parent.1.mode);
return Err(Error::new(EACCES));
}
if child.data().is_dir() {
if !child.data().permission(uid, gid, Node::MODE_WRITE) {
// println!("dir not writable {:o}", parent.1.mode);
return Err(Error::new(EACCES));
}
tx.remove_node(parent.ptr(), &child_name, Node::MODE_DIR)
.and(Ok(0))
} else {
Err(Error::new(ENOENT))
Err(Error::new(ENOTDIR))
}
})
}
......@@ -485,38 +417,36 @@ impl<D: Disk> SchemeMut for FileScheme<D> {
self.fs.tx(|tx| {
let mut nodes = Vec::new();
// TODO: Clean up indentation using let-else, possibly elsewhere too.
if let Some((child, child_name)) =
let Some((child, child_name)) =
Self::path_nodes(scheme_name, tx, path, uid, gid, &mut nodes)?
{
if let Some((parent, _parent_name)) = nodes.last() {
if !parent.data().permission(uid, gid, Node::MODE_WRITE) {
// println!("dir not writable {:o}", parent.1.mode);
return Err(Error::new(EACCES));
}
else {
return Err(Error::new(ENOENT));
};
if !child.data().is_dir() {
if child.data().uid() != uid && uid != 0 {
// println!("file not owned by current user {}", parent.1.uid);
return Err(Error::new(EACCES));
}
let Some((parent, _parent_name)) = nodes.last() else {
return Err(Error::new(EPERM));
};
if child.data().is_symlink() {
tx.remove_node(parent.ptr(), &child_name, Node::MODE_SYMLINK)
.and(Ok(0))
} else {
tx.remove_node(parent.ptr(), &child_name, Node::MODE_FILE)
.and(Ok(0))
}
} else {
Err(Error::new(EISDIR))
}
if !parent.data().permission(uid, gid, Node::MODE_WRITE) {
// println!("dir not writable {:o}", parent.1.mode);
return Err(Error::new(EACCES));
}
if !child.data().is_dir() {
if child.data().uid() != uid && uid != 0 {
// println!("file not owned by current user {}", parent.1.uid);
return Err(Error::new(EACCES));
}
if child.data().is_symlink() {
tx.remove_node(parent.ptr(), &child_name, Node::MODE_SYMLINK)
.and(Ok(0))
} else {
Err(Error::new(EPERM))
tx.remove_node(parent.ptr(), &child_name, Node::MODE_FILE)
.and(Ok(0))
}
} else {
Err(Error::new(ENOENT))
Err(Error::new(EISDIR))
}
})
}
......@@ -536,39 +466,32 @@ impl<D: Disk> SchemeMut for FileScheme<D> {
return Err(Error::new(EBADF));
};
self.fmap.get_mut(&resource.node_ptr().id()).ok_or(Error::new(EBADFD))?.open_fds += 1;
self.fmap
.get_mut(&resource.node_ptr().id())
.ok_or(Error::new(EBADFD))?
.open_fds += 1;
let id = self.next_id.fetch_add(1, Ordering::SeqCst);
self.files.insert(id, resource);
Ok(id)
}
#[allow(unused_variables)]
fn read(&mut self, id: usize, buf: &mut [u8]) -> Result<usize> {
fn read(&mut self, id: usize, buf: &mut [u8], offset: u64, _fcntl_flags: u32) -> Result<usize> {
// println!("Read {}, {:X} {}", id, buf.as_ptr() as usize, buf.len());
if let Some(file) = self.files.get_mut(&id) {
self.fs.tx(|tx| file.read(buf, tx))
} else {
Err(Error::new(EBADF))
}
let file = self.files.get_mut(&id).ok_or(Error::new(EBADF))?;
self.fs.tx(|tx| file.read(buf, offset, tx))
}
fn write(&mut self, id: usize, buf: &[u8]) -> Result<usize> {
fn write(&mut self, id: usize, buf: &[u8], offset: u64, _fcntl_flags: u32) -> Result<usize> {
// println!("Write {}, {:X} {}", id, buf.as_ptr() as usize, buf.len());
if let Some(file) = self.files.get_mut(&id) {
self.fs.tx(|tx| file.write(buf, tx))
} else {
Err(Error::new(EBADF))
}
let file = self.files.get_mut(&id).ok_or(Error::new(EBADF))?;
self.fs.tx(|tx| file.write(buf, offset, tx))
}
fn seek(&mut self, id: usize, pos: isize, whence: usize) -> Result<isize> {
fn fsize(&mut self, id: usize) -> Result<u64> {
// println!("Seek {}, {} {}", id, pos, whence);
if let Some(file) = self.files.get_mut(&id) {
self.fs.tx(|tx| file.seek(pos, whence, tx))
} else {
Err(Error::new(EBADF))
}
let file = self.files.get_mut(&id).ok_or(Error::new(EBADF))?;
self.fs.tx(|tx| file.fsize(tx))
}
fn fchmod(&mut self, id: usize, mode: u16) -> Result<usize> {
......@@ -784,14 +707,13 @@ impl<D: Disk> SchemeMut for FileScheme<D> {
}
fn mmap_prep(&mut self, id: usize, offset: u64, size: usize, flags: MapFlags) -> Result<usize> {
println!("Mmap {}, {:?} {} {}", id, flags, size, offset);
let file = self.files.get_mut(&id).ok_or(Error::new(EBADF))?;
let fmaps = &mut self.fmap;
self.fs.tx(|tx| file.fmap(fmaps, flags, size, offset, tx))
}
#[allow(unused_variables)]
fn munmap(&mut self, id: usize, offset: u64, size: usize, flags: MunmapFlags) -> Result<usize> {
println!("Munmap {}, {} {}", id, size, offset);
let file = self.files.get_mut(&id).ok_or(Error::new(EBADF))?;
let fmaps = &mut self.fmap;
......@@ -801,9 +723,15 @@ impl<D: Disk> SchemeMut for FileScheme<D> {
fn close(&mut self, id: usize) -> Result<usize> {
// println!("Close {}", id);
let file = self.files.remove(&id).ok_or(Error::new(EBADF))?;
let file_info = self.fmap.get_mut(&file.node_ptr().id()).ok_or(Error::new(EBADFD))?;
file_info.open_fds = file_info.open_fds.checked_sub(1).expect("open_fds not tracked correctly");
let file_info = self
.fmap
.get_mut(&file.node_ptr().id())
.ok_or(Error::new(EBADFD))?;
file_info.open_fds = file_info
.open_fds
.checked_sub(1)
.expect("open_fds not tracked correctly");
// TODO: If open_fds reaches zero and there are no hardlinks (directory entries) to any
// particular inode, remove that inode here.
......
use core::{fmt, mem, ops, slice};
use simple_endian::*;
use endian_num::Le;
use crate::{BlockList, BlockPtr, BlockRaw};
use crate::{BlockLevel, BlockList, BlockPtr, BlockTrait, RecordRaw, BLOCK_SIZE, RECORD_LEVEL};
/// An index into a [`Node`]'s block table.
pub enum NodeLevel {
L0(usize),
L1(usize, usize),
......@@ -12,61 +13,66 @@ pub enum NodeLevel {
}
impl NodeLevel {
// Warning: this uses constant block offsets, make sure to sync with Node
pub fn new(mut block_offset: u64) -> Option<Self> {
// Warning: this uses constant record offsets, make sure to sync with Node
/// Return the [`NodeLevel`] of the record with the given index.
/// - the first 128 are level 0,
/// - the next 64*256 are level 1,
/// - ...and so on.
pub fn new(mut record_offset: u64) -> Option<Self> {
// 1 << 8 = 256, this is the number of entries in a BlockList
const SHIFT: u64 = 8;
const NUM: u64 = 1 << SHIFT;
const MASK: u64 = NUM - 1;
const L0: u64 = 128;
if block_offset < L0 {
return Some(Self::L0((block_offset & MASK) as usize));
if record_offset < L0 {
return Some(Self::L0((record_offset & MASK) as usize));
} else {
block_offset -= L0;
record_offset -= L0;
}
const L1: u64 = 64 * NUM;
if block_offset < L1 {
if record_offset < L1 {
return Some(Self::L1(
((block_offset >> SHIFT) & MASK) as usize,
(block_offset & MASK) as usize,
((record_offset >> SHIFT) & MASK) as usize,
(record_offset & MASK) as usize,
));
} else {
block_offset -= L1;
record_offset -= L1;
}
const L2: u64 = 32 * NUM * NUM;
if block_offset < L2 {
if record_offset < L2 {
return Some(Self::L2(
((block_offset >> (2 * SHIFT)) & MASK) as usize,
((block_offset >> SHIFT) & MASK) as usize,
(block_offset & MASK) as usize,
((record_offset >> (2 * SHIFT)) & MASK) as usize,
((record_offset >> SHIFT) & MASK) as usize,
(record_offset & MASK) as usize,
));
} else {
block_offset -= L2;
record_offset -= L2;
}
const L3: u64 = 16 * NUM * NUM * NUM;
if block_offset < L3 {
if record_offset < L3 {
return Some(Self::L3(
((block_offset >> (3 * SHIFT)) & MASK) as usize,
((block_offset >> (2 * SHIFT)) & MASK) as usize,
((block_offset >> SHIFT) & MASK) as usize,
(block_offset & MASK) as usize,
((record_offset >> (3 * SHIFT)) & MASK) as usize,
((record_offset >> (2 * SHIFT)) & MASK) as usize,
((record_offset >> SHIFT) & MASK) as usize,
(record_offset & MASK) as usize,
));
} else {
block_offset -= L3;
record_offset -= L3;
}
const L4: u64 = 12 * NUM * NUM * NUM * NUM;
if block_offset < L4 {
if record_offset < L4 {
Some(Self::L4(
((block_offset >> (4 * SHIFT)) & MASK) as usize,
((block_offset >> (3 * SHIFT)) & MASK) as usize,
((block_offset >> (2 * SHIFT)) & MASK) as usize,
((block_offset >> SHIFT) & MASK) as usize,
(block_offset & MASK) as usize,
((record_offset >> (4 * SHIFT)) & MASK) as usize,
((record_offset >> (3 * SHIFT)) & MASK) as usize,
((record_offset >> (2 * SHIFT)) & MASK) as usize,
((record_offset >> SHIFT) & MASK) as usize,
(record_offset & MASK) as usize,
))
} else {
None
......@@ -74,38 +80,86 @@ impl NodeLevel {
}
}
type BlockListL1 = BlockList<BlockRaw>;
type BlockListL1 = BlockList<RecordRaw>;
type BlockListL2 = BlockList<BlockListL1>;
type BlockListL3 = BlockList<BlockListL2>;
type BlockListL4 = BlockList<BlockListL3>;
/// A file/folder node
#[repr(packed)]
#[repr(C, packed)]
pub struct Node {
pub mode: u16le,
pub uid: u32le,
pub gid: u32le,
pub links: u32le,
pub size: u64le,
pub ctime: u64le,
pub ctime_nsec: u32le,
pub mtime: u64le,
pub mtime_nsec: u32le,
pub atime: u64le,
pub atime_nsec: u32le,
pub padding: [u8; 6],
// 128 * BLOCK_SIZE (512 KiB, 4 KiB each)
pub level0: [BlockPtr<BlockRaw>; 128],
// 64 * 256 * BLOCK_SIZE (64 MiB, 1 MiB each)
/// This node's type & permissions.
/// - first four bits are permissions
/// - next four bits are permissions for the file's user
/// - next four bits are permissions for the file's group
/// - last four bits are permissions for everyone else
pub mode: Le<u16>,
/// The uid that owns this file
pub uid: Le<u32>,
/// The gid that owns this file
pub gid: Le<u32>,
/// The number of links to this file
/// (directory entries, symlinks, etc)
pub links: Le<u32>,
/// The length of this file, in bytes
pub size: Le<u64>,
pub ctime: Le<u64>,
pub ctime_nsec: Le<u32>,
pub mtime: Le<u64>,
pub mtime_nsec: Le<u32>,
pub atime: Le<u64>,
pub atime_nsec: Le<u32>,
pub record_level: Le<u32>,
pub padding: [u8; BLOCK_SIZE as usize - 4094],
/// The first 128 blocks of this file.
///
/// Total size: 128 * RECORD_SIZE (16 MiB, 128 KiB each)
pub level0: [BlockPtr<RecordRaw>; 128],
/// The next 64 * 256 blocks of this file,
/// stored behind 64 level one tables.
///
/// Total size: 64 * 256 * RECORD_SIZE (2 GiB, 32 MiB each)
pub level1: [BlockPtr<BlockListL1>; 64],
// 32 * 256 * 256 * BLOCK_SIZE (8 GiB, 256 MiB each)
/// The next 32 * 256 * 256 blocks of this file,
/// stored behind 32 level two tables.
/// Each level two table points to 256 level one tables.
///
/// Total size: 32 * 256 * 256 * RECORD_SIZE (256 GiB, 8 GiB each)
pub level2: [BlockPtr<BlockListL2>; 32],
// 16 * 256 * 256 * 256 * BLOCK_SIZE (1 TiB, 64 GiB each)
/// The next 16 * 256 * 256 * 256 blocks of this file,
/// stored behind 16 level three tables.
///
/// Total size: 16 * 256 * 256 * 256 * RECORD_SIZE (32 TiB, 2 TiB each)
pub level3: [BlockPtr<BlockListL3>; 16],
// 12 * 256 * 256 * 256 * 256 * BLOCK_SIZE (192 TiB, 16 TiB each)
/// The next 12 * 256 * 256 * 256 * 256 blocks of this file,
/// stored behind 12 level four tables.
///
/// Total size: 12 * 256 * 256 * 256 * 256 * RECORD_SIZE (6 PiB, 512 TiB each)
pub level4: [BlockPtr<BlockListL4>; 12],
}
unsafe impl BlockTrait for Node {
fn empty(level: BlockLevel) -> Option<Self> {
if level.0 == 0 {
Some(Self::default())
} else {
None
}
}
}
impl Default for Node {
fn default() -> Self {
Self {
......@@ -120,7 +174,8 @@ impl Default for Node {
mtime_nsec: 0.into(),
atime: 0.into(),
atime_nsec: 0.into(),
padding: [0; 6],
record_level: 0.into(),
padding: [0; BLOCK_SIZE as usize - 4094],
level0: [BlockPtr::default(); 128],
level1: [BlockPtr::default(); 64],
level2: [BlockPtr::default(); 32],
......@@ -136,11 +191,13 @@ impl Node {
pub const MODE_DIR: u16 = 0x4000;
pub const MODE_SYMLINK: u16 = 0xA000;
/// Mask for node permission bits
pub const MODE_PERM: u16 = 0x0FFF;
pub const MODE_EXEC: u16 = 0o1;
pub const MODE_WRITE: u16 = 0o2;
pub const MODE_READ: u16 = 0o4;
/// Create a new, empty node with the given metadata
pub fn new(mode: u16, uid: u32, gid: u32, ctime: u64, ctime_nsec: u32) -> Self {
Self {
mode: mode.into(),
......@@ -153,40 +210,62 @@ impl Node {
mtime_nsec: ctime_nsec.into(),
atime: ctime.into(),
atime_nsec: ctime_nsec.into(),
record_level: if mode & Self::MODE_TYPE == Self::MODE_FILE {
// Files take on record level
RECORD_LEVEL as u32
} else {
// Folders do not
0
}
.into(),
..Default::default()
}
}
/// This node's type & permissions.
/// - first four bits are permissions
/// - next four bits are permissions for the file's user
/// - next four bits are permissions for the file's group
/// - last four bits are permissions for everyone else
pub fn mode(&self) -> u16 {
{ self.mode }.to_native()
self.mode.to_ne()
}
/// The uid that owns this file
pub fn uid(&self) -> u32 {
{ self.uid }.to_native()
self.uid.to_ne()
}
/// The gid that owns this file
pub fn gid(&self) -> u32 {
{ self.gid }.to_native()
self.gid.to_ne()
}
/// The number of links to this file
/// (directory entries, symlinks, etc)
pub fn links(&self) -> u32 {
{ self.links }.to_native()
self.links.to_ne()
}
/// The length of this file, in bytes.
pub fn size(&self) -> u64 {
{ self.size }.to_native()
self.size.to_ne()
}
pub fn ctime(&self) -> (u64, u32) {
({ self.ctime }.to_native(), { self.ctime_nsec }.to_native())
(self.ctime.to_ne(), self.ctime_nsec.to_ne())
}
pub fn mtime(&self) -> (u64, u32) {
({ self.mtime }.to_native(), { self.mtime_nsec }.to_native())
(self.mtime.to_ne(), self.mtime_nsec.to_ne())
}
pub fn atime(&self) -> (u64, u32) {
({ self.atime }.to_native(), { self.atime_nsec }.to_native())
(self.atime.to_ne(), self.atime_nsec.to_ne())
}
pub fn record_level(&self) -> BlockLevel {
BlockLevel(self.record_level.to_ne() as usize)
}
pub fn set_mode(&mut self, mode: u16) {
......
use alloc::{boxed::Box, vec};
use core::ops;
use crate::{BlockLevel, BlockTrait, RECORD_LEVEL};
//TODO: this is a box to prevent stack overflows
pub struct RecordRaw(Box<[u8]>);
unsafe impl BlockTrait for RecordRaw {
fn empty(level: BlockLevel) -> Option<Self> {
if level.0 <= RECORD_LEVEL {
Some(Self(vec![0; level.bytes() as usize].into_boxed_slice()))
} else {
None
}
}
}
impl Clone for RecordRaw {
fn clone(&self) -> Self {
Self(self.0.clone())
}
}
impl ops::Deref for RecordRaw {
type Target = [u8];
fn deref(&self) -> &[u8] {
&self.0
}
}
impl ops::DerefMut for RecordRaw {
fn deref_mut(&mut self) -> &mut [u8] {
&mut self.0
}
}
#[test]
fn record_raw_size_test() {
for level_i in 0..RECORD_LEVEL {
let level = BlockLevel(level_i);
assert_eq!(
RecordRaw::empty(level).unwrap().len(),
level.bytes() as usize
);
}
}
use std::ops::DerefMut;
use crate::{unmount_path, DiskSparse, FileSystem, Node, TreePtr, ALLOC_GC_THRESHOLD};
use std::path::Path;
use std::process::Command;
use std::{fs, sync, thread, time};
use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering::Relaxed;
use std::{fs, thread, time};
use crate::{unmount_path, DiskSparse, FileSystem};
static IMAGE_SEQ: AtomicUsize = AtomicUsize::new(0);
fn with_redoxfs<T, F>(callback: F) -> T
where
T: Send + Sync + 'static,
F: FnMut(&Path) -> T + Send + Sync + 'static,
F: FnOnce(FileSystem<DiskSparse>) -> T + Send + Sync + 'static,
{
let disk_path = "image.bin";
let mount_path = "image";
let disk_path = format!("image{}.bin", IMAGE_SEQ.fetch_add(1, Relaxed));
let res = {
let disk = DiskSparse::create(dbg!(disk_path), 1024 * 1024 * 1024).unwrap();
if cfg!(not(target_os = "redox")) {
if !Path::new(mount_path).exists() {
dbg!(fs::create_dir(dbg!(mount_path))).unwrap();
}
}
let disk = DiskSparse::create(dbg!(&disk_path), 1024 * 1024 * 1024).unwrap();
let ctime = dbg!(time::SystemTime::now().duration_since(time::UNIX_EPOCH)).unwrap();
let fs = FileSystem::create(disk, None, ctime.as_secs(), ctime.subsec_nanos()).unwrap();
let callback_mutex = sync::Arc::new(sync::Mutex::new(callback));
callback(fs)
};
dbg!(fs::remove_file(dbg!(disk_path))).unwrap();
res
}
fn with_mounted<T, F>(callback: F) -> T
where
T: Send + Sync + 'static,
F: FnOnce(&Path) -> T + Send + Sync + 'static,
{
let mount_path_o = format!("image{}", IMAGE_SEQ.fetch_add(1, Relaxed));
let mount_path = mount_path_o.clone();
let res = with_redoxfs(move |fs| {
if cfg!(not(target_os = "redox")) {
if !Path::new(&mount_path).exists() {
dbg!(fs::create_dir(dbg!(&mount_path))).unwrap();
}
}
let join_handle = crate::mount(fs, dbg!(mount_path), move |real_path| {
let callback_mutex = callback_mutex.clone();
let real_path = real_path.to_owned();
thread::spawn(move || {
let res = {
let mut callback_guard = callback_mutex.lock().unwrap();
let callback = callback_guard.deref_mut();
callback(&real_path)
};
let res = callback(&real_path);
let real_path = real_path.to_str().unwrap();
if cfg!(target_os = "redox") {
dbg!(fs::remove_file(dbg!(format!(":{}", mount_path)))).unwrap();
dbg!(fs::remove_file(dbg!(format!(":{}", real_path)))).unwrap();
} else {
if !dbg!(Command::new("sync").status()).unwrap().success() {
panic!("sync failed");
}
if !unmount_path(mount_path).is_ok() {
if !unmount_path(real_path).is_ok() {
panic!("umount failed");
}
}
......@@ -54,12 +66,10 @@ where
.unwrap();
join_handle.join().unwrap()
};
dbg!(fs::remove_file(dbg!(disk_path))).unwrap();
});
if cfg!(not(target_os = "redox")) {
dbg!(fs::remove_dir(dbg!(mount_path))).unwrap();
dbg!(fs::remove_dir(dbg!(mount_path_o))).unwrap();
}
res
......@@ -67,7 +77,7 @@ where
#[test]
fn simple() {
with_redoxfs(|path| {
with_mounted(|path| {
dbg!(fs::create_dir(&path.join("test"))).unwrap();
})
}
......@@ -78,36 +88,36 @@ fn mmap() {
use syscall;
//TODO
with_redoxfs(|path| {
with_mounted(|path| {
use std::slice;
let path = dbg!(path.join("test"));
let mmap_inner = |write: bool| {
let fd = dbg!(syscall::open(
let fd = dbg!(libredox::call::open(
path.to_str().unwrap(),
syscall::O_CREAT | syscall::O_RDWR | syscall::O_CLOEXEC
libredox::flag::O_CREAT | libredox::flag::O_RDWR | libredox::flag::O_CLOEXEC,
0,
))
.unwrap();
let map = unsafe {
slice::from_raw_parts_mut(
dbg!(syscall::fmap(
dbg!(libredox::call::mmap(libredox::call::MmapArgs {
fd,
&syscall::Map {
offset: 0,
size: 128,
flags: syscall::PROT_READ | syscall::PROT_WRITE,
address: 0,
}
))
offset: 0,
length: 128,
prot: libredox::flag::PROT_READ | libredox::flag::PROT_WRITE,
flags: libredox::flag::MAP_SHARED,
addr: core::ptr::null_mut(),
}))
.unwrap() as *mut u8,
128,
)
};
// Maps should be available after closing
assert_eq!(dbg!(syscall::close(fd)), Ok(0));
assert_eq!(dbg!(libredox::call::close(fd)), Ok(()));
for i in 0..128 {
if write {
......@@ -119,8 +129,8 @@ fn mmap() {
//TODO: add msync
unsafe {
assert_eq!(
dbg!(syscall::funmap(map.as_mut_ptr() as usize, map.len())),
Ok(0)
dbg!(libredox::call::munmap(map.as_mut_ptr().cast(), map.len())),
Ok(())
);
}
};
......@@ -129,3 +139,54 @@ fn mmap() {
mmap_inner(false);
})
}
#[test]
fn create_remove_should_not_increase_size() {
with_redoxfs(|mut fs| {
let initially_free = fs.allocator().free();
let tree_ptr = TreePtr::<Node>::root();
let name = "test";
let _ = fs
.tx(|tx| {
tx.create_node(tree_ptr, name, Node::MODE_FILE | 0644, 1, 0)?;
tx.remove_node(tree_ptr, name, Node::MODE_FILE)
})
.unwrap();
assert_eq!(fs.allocator().free(), initially_free);
});
}
#[test]
fn many_create_remove_should_not_increase_size() {
with_redoxfs(|mut fs| {
let initially_free = fs.allocator().free();
let tree_ptr = TreePtr::<Node>::root();
let name = "test";
// Iterate over 255 times to prove deleted files don't retain space within the node tree
// Iterate to an ALLOC_GC_THRESHOLD boundary to ensure the allocator GC reclaims space
let start = fs.header.generation.to_ne();
let end = start + ALLOC_GC_THRESHOLD;
let end = end - (end % ALLOC_GC_THRESHOLD) + 1 + ALLOC_GC_THRESHOLD;
for i in start..end {
let _ = fs
.tx(|tx| {
tx.create_node(
tree_ptr,
&format!("{}{}", name, i),
Node::MODE_FILE | 0644,
1,
0,
)?;
tx.remove_node(tree_ptr, &format!("{}{}", name, i), Node::MODE_FILE)
})
.unwrap();
}
// Any value greater than 0 indicates a storage leak
let diff = initially_free - fs.allocator().free();
assert_eq!(diff, 0);
});
}
use alloc::{
boxed::Box,
collections::{BTreeMap, VecDeque},
vec::Vec,
};
......@@ -12,9 +13,9 @@ use syscall::error::{
};
use crate::{
AllocEntry, AllocList, Allocator, BlockData, BlockPtr, BlockRaw, DirEntry, DirList, Disk,
FileSystem, Header, Node, NodeLevel, TreeData, TreePtr, ALLOC_LIST_ENTRIES, BLOCK_SIZE,
HEADER_RING,
AllocEntry, AllocList, Allocator, BlockAddr, BlockData, BlockLevel, BlockPtr, BlockTrait,
DirEntry, DirList, Disk, FileSystem, Header, Node, NodeLevel, RecordRaw, TreeData, TreePtr,
ALLOC_GC_THRESHOLD, ALLOC_LIST_ENTRIES, DIR_ENTRY_MAX_LENGTH, HEADER_RING,
};
pub struct Transaction<'a, D: Disk> {
......@@ -25,13 +26,13 @@ pub struct Transaction<'a, D: Disk> {
pub header_changed: bool,
allocator: Allocator,
allocator_log: VecDeque<AllocEntry>,
deallocate: Vec<u64>,
write_cache: BTreeMap<u64, BlockRaw>,
deallocate: Vec<BlockAddr>,
write_cache: BTreeMap<BlockAddr, Box<[u8]>>,
}
impl<'a, D: Disk> Transaction<'a, D> {
pub(crate) fn new(fs: &'a mut FileSystem<D>) -> Self {
let header = fs.header.clone();
let header = fs.header;
let allocator = fs.allocator.clone();
Self {
fs,
......@@ -51,30 +52,38 @@ impl<'a, D: Disk> Transaction<'a, D> {
Ok(())
}
// Unsafe because order must be done carefully and changes must be flushed to disk
unsafe fn allocate(&mut self) -> Result<u64> {
match self.allocator.allocate() {
//
// MARK: block operations
//
/// Allocate a new block of size `level`, returning its address.
/// - returns `Err(ENOSPC)` if a block of this size could not be alloated.
/// - unsafe because order must be done carefully and changes must be flushed to disk
unsafe fn allocate(&mut self, level: BlockLevel) -> Result<BlockAddr> {
match self.allocator.allocate(level) {
Some(addr) => {
self.allocator_log.push_back(AllocEntry::new(addr, -1));
self.allocator_log.push_back(AllocEntry::allocate(addr));
Ok(addr)
}
None => Err(Error::new(ENOSPC)),
}
}
// Unsafe because order must be done carefully and changes must be flushed to disk
unsafe fn deallocate(&mut self, addr: u64) {
/// Deallocate the given block.
/// - unsafe because order must be done carefully and changes must be flushed to disk
unsafe fn deallocate(&mut self, addr: BlockAddr) {
//TODO: should we use some sort of not-null abstraction?
assert!(addr != 0);
assert!(!addr.is_null());
// Remove from write_cache if it is there, since it no longer needs to be written
//TODO: for larger blocks do we need to check for sub-blocks in here?
self.write_cache.remove(&addr);
// Search and remove the last matching entry in allocator_log
let mut found = false;
for i in (0..self.allocator_log.len()).rev() {
let entry = self.allocator_log[i];
if entry.addr() == addr && entry.count() == -1 {
if entry.index() == addr.index() && entry.count() == -addr.level().blocks() {
found = true;
self.allocator_log.remove(i);
break;
......@@ -90,7 +99,7 @@ impl<'a, D: Disk> Transaction<'a, D> {
}
}
fn deallocate_block<T>(&mut self, ptr: BlockPtr<T>) {
fn deallocate_block<T: BlockTrait>(&mut self, ptr: BlockPtr<T>) {
if !ptr.is_null() {
unsafe {
self.deallocate(ptr.addr());
......@@ -98,28 +107,39 @@ impl<'a, D: Disk> Transaction<'a, D> {
}
}
fn sync_allocator(&mut self, squash: bool) -> Result<bool> {
/// Drain `self.allocator_log` and `self.deallocate`,
/// updating the [`AllocList`] with the resulting state.
///
/// This method does not write anything to disk,
/// all writes are cached.
///
/// To keep the allocator log from growing excessively, it will
/// periodically be fully rebuilt using the state of `self.allocator`.
/// This rebuild can be forced by setting `force_squash` to `true`.
fn sync_allocator(&mut self, force_squash: bool) -> Result<bool> {
let mut prev_ptr = BlockPtr::default();
if squash {
let should_gc = self.header.generation() % ALLOC_GC_THRESHOLD == 0
&& self.header.generation() >= ALLOC_GC_THRESHOLD;
if force_squash || should_gc {
// Clear and rebuild alloc log
self.allocator_log.clear();
let levels = self.allocator.levels();
for level in (0..levels.len()).rev() {
let count = (1 << level) as i64;
'addrs: for &addr in levels[level].iter() {
'indexs: for &index in levels[level].iter() {
for entry in self.allocator_log.iter_mut() {
if addr + count as u64 == entry.addr() {
if index + count as u64 == entry.index() {
// New entry is at start of existing entry
*entry = AllocEntry::new(addr, count + entry.count());
continue 'addrs;
} else if entry.addr() + entry.count() as u64 == addr {
*entry = AllocEntry::new(index, count + entry.count());
continue 'indexs;
} else if entry.index() + entry.count() as u64 == index {
// New entry is at end of existing entry
*entry = AllocEntry::new(entry.addr(), entry.count() + count);
continue 'addrs;
*entry = AllocEntry::new(entry.index(), entry.count() + count);
continue 'indexs;
}
}
self.allocator_log.push_back(AllocEntry::new(addr, count));
self.allocator_log.push_back(AllocEntry::new(index, count));
}
}
......@@ -127,7 +147,7 @@ impl<'a, D: Disk> Transaction<'a, D> {
let mut alloc_ptr = self.header.alloc;
while !alloc_ptr.is_null() {
let alloc = self.read_block(alloc_ptr)?;
self.deallocate_block(alloc_ptr);
self.deallocate.push(alloc.addr());
alloc_ptr = alloc.data().prev;
}
} else {
......@@ -147,9 +167,7 @@ impl<'a, D: Disk> Transaction<'a, D> {
}
// Prepare to deallocate old alloc block
unsafe {
self.deallocate(alloc.addr());
}
self.deallocate.push(alloc.addr());
// Link to previous alloc block
prev_ptr = alloc.data().prev;
......@@ -160,18 +178,18 @@ impl<'a, D: Disk> Transaction<'a, D> {
while new_blocks.len() * ALLOC_LIST_ENTRIES
<= self.allocator_log.len() + self.deallocate.len()
{
new_blocks.push(unsafe { self.allocate()? });
new_blocks.push(unsafe { self.allocate(BlockLevel::default())? });
}
// De-allocate old blocks (after allocation to prevent re-use)
//TODO: optimize allocator log in memory
while let Some(addr) = self.deallocate.pop() {
self.allocator.deallocate(addr);
self.allocator_log.push_back(AllocEntry::new(addr, 1));
self.allocator_log.push_back(AllocEntry::deallocate(addr));
}
for new_block in new_blocks {
let mut alloc = BlockData::new(new_block, AllocList::default());
let mut alloc = BlockData::<AllocList>::empty(new_block).unwrap();
alloc.data_mut().prev = prev_ptr;
for entry in alloc.data_mut().entries.iter_mut() {
if let Some(log_entry) = self.allocator_log.pop_front() {
......@@ -189,17 +207,20 @@ impl<'a, D: Disk> Transaction<'a, D> {
Ok(true)
}
//TODO: change this function, provide another way to squash, only write header in commit
pub fn sync(&mut self, squash: bool) -> Result<bool> {
/// Write all changes cached in this [`Transaction`] to disk.
pub fn sync(&mut self, force_squash: bool) -> Result<bool> {
// Make sure alloc is synced
self.sync_allocator(squash)?;
self.sync_allocator(force_squash)?;
// Write all items in write cache
for (addr, raw) in self.write_cache.iter_mut() {
// sync_alloc must have changed alloc block pointer
// if we have any blocks to write
assert!(self.header_changed);
self.fs.encrypt(raw);
let count = unsafe { self.fs.disk.write_at(self.fs.block + addr, &raw)? };
if count != mem::size_of::<BlockRaw>() {
let count = unsafe { self.fs.disk.write_at(self.fs.block + addr.index(), raw)? };
if count != raw.len() {
// Read wrong number of bytes
#[cfg(feature = "log")]
log::error!("SYNC WRITE_CACHE: WRONG NUMBER OF BYTES");
......@@ -208,6 +229,10 @@ impl<'a, D: Disk> Transaction<'a, D> {
}
self.write_cache.clear();
// Do nothing if there are no changes to write.
//
// This only happens if `self.write_cache` was empty,
// and the fs header wasn't changed by another operation.
if !self.header_changed {
return Ok(false);
}
......@@ -233,7 +258,7 @@ impl<'a, D: Disk> Transaction<'a, D> {
Ok(true)
}
pub fn read_block<T: Default + DerefMut<Target = [u8]>>(
pub fn read_block<T: BlockTrait + DerefMut<Target = [u8]>>(
&mut self,
ptr: BlockPtr<T>,
) -> Result<BlockData<T>> {
......@@ -244,16 +269,23 @@ impl<'a, D: Disk> Transaction<'a, D> {
return Err(Error::new(ENOENT));
}
let mut data = T::default();
let mut data = match T::empty(ptr.addr().level()) {
Some(some) => some,
None => {
#[cfg(feature = "log")]
log::error!("READ_BLOCK: INVALID BLOCK LEVEL FOR TYPE");
return Err(Error::new(ENOENT));
}
};
if let Some(raw) = self.write_cache.get(&ptr.addr()) {
data.copy_from_slice(raw);
} else {
let count = unsafe {
self.fs
.disk
.read_at(self.fs.block + ptr.addr(), &mut data)?
.read_at(self.fs.block + ptr.addr().index(), &mut data)?
};
if count != mem::size_of::<T>() {
if count != data.len() {
// Read wrong number of bytes
#[cfg(feature = "log")]
log::error!("READ_BLOCK: WRONG NUMBER OF BYTES");
......@@ -268,10 +300,10 @@ impl<'a, D: Disk> Transaction<'a, D> {
// Incorrect hash
#[cfg(feature = "log")]
log::error!(
"READ_BLOCK: INCORRECT HASH {} != {} for block {}",
"READ_BLOCK: INCORRECT HASH 0x{:X} != 0x{:X} for block 0x{:X}",
block_ptr.hash(),
ptr.hash(),
ptr.addr()
ptr.addr().index()
);
return Err(Error::new(EIO));
}
......@@ -282,27 +314,63 @@ impl<'a, D: Disk> Transaction<'a, D> {
///
/// # Safety
/// Unsafe because it creates strange BlockData types that must be swapped before use
unsafe fn read_block_or_default<T: Default + DerefMut<Target = [u8]>>(
unsafe fn read_block_or_empty<T: BlockTrait + DerefMut<Target = [u8]>>(
&mut self,
ptr: BlockPtr<T>,
) -> Result<BlockData<T>> {
if ptr.is_null() {
Ok(BlockData::new(0, T::default()))
match T::empty(ptr.addr().level()) {
Some(empty) => Ok(BlockData::new(BlockAddr::default(), empty)),
None => {
#[cfg(feature = "log")]
log::error!("READ_BLOCK_OR_EMPTY: INVALID BLOCK LEVEL FOR TYPE");
Err(Error::new(ENOENT))
}
}
} else {
self.read_block(ptr)
}
}
unsafe fn read_record<T: BlockTrait + DerefMut<Target = [u8]>>(
&mut self,
ptr: BlockPtr<T>,
level: BlockLevel,
) -> Result<BlockData<T>> {
let record = unsafe { self.read_block_or_empty(ptr)? };
if record.addr().level() >= level {
// Return record if it is larger than or equal to requested level
return Ok(record);
}
// If a larger level was requested,
// create a fake record with the requested level
// and fill it with the data in the original record.
let (_old_addr, old_raw) = unsafe { record.into_parts() };
let mut raw = match T::empty(level) {
Some(empty) => empty,
None => {
#[cfg(feature = "log")]
log::error!("READ_RECORD: INVALID BLOCK LEVEL FOR TYPE");
return Err(Error::new(ENOENT));
}
};
let len = min(raw.len(), old_raw.len());
raw[..len].copy_from_slice(&old_raw[..len]);
Ok(BlockData::new(BlockAddr::null(level), raw))
}
/// Write block data to a new address, returning new address
pub fn sync_block<T: Deref<Target = [u8]>>(
pub fn sync_block<T: BlockTrait + Deref<Target = [u8]>>(
&mut self,
mut block: BlockData<T>,
) -> Result<BlockPtr<T>> {
// Swap block to new address
let old_addr = block.swap_addr(unsafe { self.allocate()? });
let level = block.addr().level();
let old_addr = block.swap_addr(unsafe { self.allocate(level)? });
// Deallocate old address (will only take effect after sync_allocator, which helps to
// prevent re-use before a new header is written
if old_addr != 0 {
if !old_addr.is_null() {
unsafe {
self.deallocate(old_addr);
}
......@@ -315,29 +383,36 @@ impl<'a, D: Disk> Transaction<'a, D> {
///
/// # Safety
/// Unsafe to encourage CoW semantics
pub(crate) unsafe fn write_block<T: Deref<Target = [u8]>>(
pub(crate) unsafe fn write_block<T: BlockTrait + Deref<Target = [u8]>>(
&mut self,
block: BlockData<T>,
) -> Result<BlockPtr<T>> {
if block.addr() == 0 {
if block.addr().is_null() {
// Pointer is invalid
#[cfg(feature = "log")]
log::error!("WRITE_BLOCK: POINTER IS NULL");
return Err(Error::new(ENOENT));
}
//TODO: transmute?
let mut raw = BlockRaw::default();
raw.copy_from_slice(block.data());
self.write_cache.insert(block.addr(), raw);
//TODO: do not convert to boxed slice if it already is one
self.write_cache.insert(
block.addr(),
block.data().deref().to_vec().into_boxed_slice(),
);
Ok(block.create_ptr())
}
pub fn read_tree<T: Default + DerefMut<Target = [u8]>>(
//
// MARK: tree operations
//
/// Walk the tree and return the contents and address
/// of the data block that `ptr` points too.
fn read_tree_and_addr<T: BlockTrait + DerefMut<Target = [u8]>>(
&mut self,
ptr: TreePtr<T>,
) -> Result<TreeData<T>> {
) -> Result<(TreeData<T>, BlockAddr)> {
if ptr.is_null() {
// ID is invalid (should this return None?)
#[cfg(feature = "log")]
......@@ -353,28 +428,46 @@ impl<'a, D: Disk> Transaction<'a, D> {
let raw = self.read_block(l0.data().ptrs[i0])?;
//TODO: transmute instead of copy?
let mut data = T::default();
let mut data = match T::empty(BlockLevel::default()) {
Some(some) => some,
None => {
#[cfg(feature = "log")]
log::error!("READ_TREE: INVALID BLOCK LEVEL FOR TYPE");
return Err(Error::new(ENOENT));
}
};
data.copy_from_slice(raw.data());
Ok(TreeData::new(ptr.id(), data))
Ok((TreeData::new(ptr.id(), data), raw.addr()))
}
//TODO: improve performance, reduce writes
/// Walk the tree and return the contents of the data block that `ptr` points too.
pub fn read_tree<T: BlockTrait + DerefMut<Target = [u8]>>(
&mut self,
ptr: TreePtr<T>,
) -> Result<TreeData<T>> {
Ok(self.read_tree_and_addr(ptr)?.0)
}
/// Insert `block_ptr` into the first free slot in the tree,
/// returning a pointer to that slot.
pub fn insert_tree<T: Deref<Target = [u8]>>(
&mut self,
block_ptr: BlockPtr<T>,
) -> Result<TreePtr<T>> {
// TODO: improve performance, reduce writes
// Remember that if there is a free block at any level it will always sync when it
// allocates at the lowest level, so we can save a write by not writing each level as it
// is allocated.
unsafe {
let mut l3 = self.read_block(self.header.tree)?;
for i3 in 0..l3.data().ptrs.len() {
let mut l2 = self.read_block_or_default(l3.data().ptrs[i3])?;
let mut l2 = self.read_block_or_empty(l3.data().ptrs[i3])?;
for i2 in 0..l2.data().ptrs.len() {
let mut l1 = self.read_block_or_default(l2.data().ptrs[i2])?;
let mut l1 = self.read_block_or_empty(l2.data().ptrs[i2])?;
for i1 in 0..l1.data().ptrs.len() {
let mut l0 = self.read_block_or_default(l1.data().ptrs[i1])?;
let mut l0 = self.read_block_or_empty(l1.data().ptrs[i1])?;
for i0 in 0..l0.data().ptrs.len() {
let pn = l0.data().ptrs[i0];
......@@ -390,6 +483,7 @@ impl<'a, D: Disk> Transaction<'a, D> {
continue;
}
// TODO: do we need to write all of these?
// Write updates to newly allocated blocks
l0.data_mut().ptrs[i0] = block_ptr.cast();
l1.data_mut().ptrs[i1] = self.sync_block(l0)?;
......@@ -408,6 +502,36 @@ impl<'a, D: Disk> Transaction<'a, D> {
Err(Error::new(ENOSPC))
}
/// Clear the previously claimed slot in the tree for the given `ptr`. Note that this
/// should only be called after the corresponding node block has already been deallocated.
fn remove_tree<T: BlockTrait + DerefMut<Target = [u8]>>(
&mut self,
ptr: TreePtr<T>,
) -> Result<()> {
if ptr.is_null() {
// ID is invalid (should this return None?)
#[cfg(feature = "log")]
log::error!("READ_TREE: ID IS NULL");
return Err(Error::new(ENOENT));
}
let (i3, i2, i1, i0) = ptr.indexes();
let mut l3 = self.read_block(self.header.tree)?;
let mut l2 = self.read_block(l3.data().ptrs[i3])?;
let mut l1 = self.read_block(l2.data().ptrs[i2])?;
let mut l0 = self.read_block(l1.data().ptrs[i1])?;
// Clear the value in the tree, but do not deallocate the block, as that should already
// have been done at the node level.
l0.data_mut().ptrs[i0] = BlockPtr::default();
l1.data_mut().ptrs[i1] = self.sync_block(l0)?;
l2.data_mut().ptrs[i2] = self.sync_block(l1)?;
l3.data_mut().ptrs[i3] = self.sync_block(l2)?;
self.header.tree = self.sync_block(l3)?;
self.header_changed = true;
Ok(())
}
pub fn sync_trees<T: Deref<Target = [u8]>>(&mut self, nodes: &[TreeData<T>]) -> Result<()> {
for node in nodes.iter().rev() {
let ptr = node.ptr();
......@@ -451,18 +575,27 @@ impl<'a, D: Disk> Transaction<'a, D> {
self.sync_trees(&[node])
}
//TODO: use more efficient methods for reading directories
//
// MARK: node operations
//
// TODO: use more efficient methods for reading directories
/// Write all children of `parent_ptr` to `children`.
/// `parent_ptr` must point to a directory node.
pub fn child_nodes(
&mut self,
parent_ptr: TreePtr<Node>,
children: &mut Vec<DirEntry>,
) -> Result<()> {
let parent = self.read_tree(parent_ptr)?;
for block_offset in 0..(parent.data().size() / BLOCK_SIZE) {
let block_ptr = self.node_block_ptr(&parent, block_offset)?;
let record_level = parent.data().record_level();
for record_offset in 0..(parent.data().size() / record_level.bytes()) {
let block_ptr = self.node_record_ptr(&parent, record_offset)?;
// TODO: is this safe? what if child_nodes is called on
// a node that isn't a directory?
let dir_ptr: BlockPtr<DirList> = unsafe { block_ptr.cast() };
let dir = self.read_block(dir_ptr)?;
for entry in dir.data().entries {
for entry in dir.data().entries.iter() {
let node_ptr = entry.node_ptr();
// Skip empty entries
......@@ -470,7 +603,7 @@ impl<'a, D: Disk> Transaction<'a, D> {
continue;
}
children.push(entry);
children.push(*entry);
}
}
......@@ -478,13 +611,16 @@ impl<'a, D: Disk> Transaction<'a, D> {
}
//TODO: improve performance (h-tree?)
/// Find a node that is a child of the `parent_ptr` and is named `name`.
/// Returns ENOENT if this node is not found.
pub fn find_node(&mut self, parent_ptr: TreePtr<Node>, name: &str) -> Result<TreeData<Node>> {
let parent = self.read_tree(parent_ptr)?;
for block_offset in 0..(parent.data().size() / BLOCK_SIZE) {
let block_ptr = self.node_block_ptr(&parent, block_offset)?;
let record_level = parent.data().record_level();
for block_offset in 0..(parent.data().size() / record_level.bytes()) {
let block_ptr = self.node_record_ptr(&parent, block_offset)?;
let dir_ptr: BlockPtr<DirList> = unsafe { block_ptr.cast() };
let dir = self.read_block(dir_ptr)?;
for entry in dir.data().entries {
for entry in dir.data().entries.iter() {
let node_ptr = entry.node_ptr();
// Skip empty entries
......@@ -505,7 +641,8 @@ impl<'a, D: Disk> Transaction<'a, D> {
Err(Error::new(ENOENT))
}
//TODO: improve performance (h-tree?)
// TODO: improve performance (h-tree?)
/// Create a new node in the tree with the given parameters.
pub fn create_node(
&mut self,
parent_ptr: TreePtr<Node>,
......@@ -514,18 +651,12 @@ impl<'a, D: Disk> Transaction<'a, D> {
ctime: u64,
ctime_nsec: u32,
) -> Result<TreeData<Node>> {
if name.contains(':') {
return Err(Error::new(EINVAL));
}
if self.find_node(parent_ptr, name).is_ok() {
return Err(Error::new(EEXIST));
}
self.check_name(&parent_ptr, name)?;
unsafe {
let parent = self.read_tree(parent_ptr)?;
let node_block_data = BlockData::new(
self.allocate()?,
self.allocate(BlockLevel::default())?,
Node::new(
mode,
parent.data().uid(),
......@@ -550,57 +681,57 @@ impl<'a, D: Disk> Transaction<'a, D> {
name: &str,
node_ptr: TreePtr<Node>,
) -> Result<()> {
if name.contains(':') {
return Err(Error::new(EINVAL));
}
if self.find_node(parent_ptr, name).is_ok() {
return Err(Error::new(EEXIST));
}
self.check_name(&parent_ptr, name)?;
let mut parent = self.read_tree(parent_ptr)?;
let mut node = self.read_tree(node_ptr)?;
// Increment node reference counter
let links = node.data().links();
node.data_mut().set_links(links + 1);
let entry = DirEntry::new(node_ptr, name).ok_or(Error::new(EINVAL))?;
let entry = DirEntry::new(node_ptr, name);
let block_end = parent.data().size() / BLOCK_SIZE;
for block_offset in 0..block_end {
let mut dir_block_ptr = self.node_block_ptr(&parent, block_offset)?;
let mut dir_ptr: BlockPtr<DirList> = unsafe { dir_block_ptr.cast() };
let record_level = parent.data().record_level();
let record_end = parent.data().size() / record_level.bytes();
for record_offset in 0..record_end {
let mut dir_record_ptr = self.node_record_ptr(&parent, record_offset)?;
let mut dir_ptr: BlockPtr<DirList> = unsafe { dir_record_ptr.cast() };
let mut dir = self.read_block(dir_ptr)?;
let mut dir_changed = false;
for old_entry in dir.data_mut().entries.iter_mut() {
// Skip filled entries
if !old_entry.node_ptr().is_null() {
continue;
}
// Write our new entry into the first
// free slot in this directory
*old_entry = entry;
dir_changed = true;
break;
}
if dir_changed {
dir_ptr = self.sync_block(dir)?;
dir_block_ptr = unsafe { dir_ptr.cast() };
self.sync_node_block_ptr(&mut parent, block_offset, dir_block_ptr)?;
// Write updated blocks
dir_ptr = self.sync_block(dir)?;
dir_record_ptr = unsafe { dir_ptr.cast() };
self.sync_node_record_ptr(&mut parent, record_offset, dir_record_ptr)?;
self.sync_trees(&[parent, node])?;
return Ok(());
}
}
// Append a new dirlist, with first entry set to new entry
let mut dir = BlockData::new(unsafe { self.allocate()? }, DirList::default());
// We couldn't find a free direntry slot, this directory is full.
// We now need to add a new dirlist block to the parent node,
// with `entry` as its first member.
let mut dir =
BlockData::<DirList>::empty(unsafe { self.allocate(BlockLevel::default())? }).unwrap();
dir.data_mut().entries[0] = entry;
let dir_ptr = unsafe { self.write_block(dir)? };
let dir_block_ptr: BlockPtr<BlockRaw> = unsafe { dir_ptr.cast() };
let dir_record_ptr = unsafe { dir_ptr.cast() };
self.sync_node_block_ptr(&mut parent, block_end, dir_block_ptr)?;
parent.data_mut().set_size((block_end + 1) * BLOCK_SIZE);
self.sync_node_record_ptr(&mut parent, record_end, dir_record_ptr)?;
parent
.data_mut()
.set_size((record_end + 1) * record_level.bytes());
self.sync_trees(&[parent, node])?;
Ok(())
......@@ -608,10 +739,11 @@ impl<'a, D: Disk> Transaction<'a, D> {
pub fn remove_node(&mut self, parent_ptr: TreePtr<Node>, name: &str, mode: u16) -> Result<()> {
let mut parent = self.read_tree(parent_ptr)?;
let blocks = parent.data().size() / BLOCK_SIZE;
for block_offset in 0..blocks {
let mut dir_block_ptr = self.node_block_ptr(&parent, block_offset)?;
let mut dir_ptr: BlockPtr<DirList> = unsafe { dir_block_ptr.cast() };
let record_level = parent.data().record_level();
let records = parent.data().size() / record_level.bytes();
for record_offset in 0..records {
let mut dir_record_ptr = self.node_record_ptr(&parent, record_offset)?;
let mut dir_ptr: BlockPtr<DirList> = unsafe { dir_record_ptr.cast() };
let mut dir = self.read_block(dir_ptr)?;
let mut node_opt = None;
for entry in dir.data_mut().entries.iter_mut() {
......@@ -626,15 +758,18 @@ impl<'a, D: Disk> Transaction<'a, D> {
if let Some(entry_name) = entry.name() {
if entry_name == name {
// Read node and test type against requested type
let node = self.read_tree(node_ptr)?;
let (node, addr) = self.read_tree_and_addr(node_ptr)?;
if node.data().mode() & Node::MODE_TYPE == mode {
if node.data().is_dir() && node.data().size() > 0 && node.data().links() == 1 {
if node.data().is_dir()
&& node.data().size() > 0
&& node.data().links() == 1
{
// Tried to remove directory that still has entries
return Err(Error::new(ENOTEMPTY));
}
// Save node and clear entry
node_opt = Some(node);
node_opt = Some((entry.node_ptr(), node, addr));
*entry = DirEntry::default();
break;
} else if node.data().is_dir() {
......@@ -648,28 +783,55 @@ impl<'a, D: Disk> Transaction<'a, D> {
}
}
if let Some(mut node) = node_opt {
if let Some((node_tree_ptr, mut node, addr)) = node_opt {
let links = node.data().links();
if links > 1 {
let remove_node = if links > 1 {
node.data_mut().set_links(links - 1);
false
} else {
node.data_mut().set_links(0);
self.truncate_node_inner(&mut node, 0)?;
}
true
};
if block_offset == blocks - 1 && dir.data().is_empty() {
// Remove empty parent block, if it is at the end
self.remove_node_block_ptr(&mut parent, block_offset)?;
parent.data_mut().set_size(block_offset * BLOCK_SIZE);
if record_offset == records - 1 && dir.data().is_empty() {
let mut remove_record = record_offset;
loop {
// Remove empty parent record, if it is at the end
self.remove_node_record_ptr(&mut parent, remove_record)?;
parent
.data_mut()
.set_size(remove_record * record_level.bytes());
// Keep going for any other empty records
if remove_record > 0 {
remove_record -= 1;
dir_record_ptr = self.node_record_ptr(&parent, remove_record)?;
dir_ptr = unsafe { dir_record_ptr.cast() };
dir = self.read_block(dir_ptr)?;
if dir.data().is_empty() {
continue;
}
}
break;
}
} else {
// Save new parent block
// Save new parent record
dir_ptr = self.sync_block(dir)?;
dir_block_ptr = unsafe { dir_ptr.cast() };
self.sync_node_block_ptr(&mut parent, block_offset, dir_block_ptr)?;
dir_record_ptr = unsafe { dir_ptr.cast() };
self.sync_node_record_ptr(&mut parent, record_offset, dir_record_ptr)?;
}
// Sync both parent and node at the same time
self.sync_trees(&[parent, node])?;
if remove_node {
self.sync_tree(parent)?;
self.remove_tree(node_tree_ptr)?;
unsafe {
self.deallocate(addr);
}
} else {
// Sync both parent and node at the same time
self.sync_trees(&[parent, node])?;
}
return Ok(());
}
......@@ -687,7 +849,7 @@ impl<'a, D: Disk> Transaction<'a, D> {
) -> Result<()> {
let orig = self.find_node(orig_parent_ptr, orig_name)?;
//TODO: only allow ENOENT as an error?
// TODO: only allow ENOENT as an error?
if let Ok(new) = self.find_node(new_parent_ptr, new_name) {
// Move to same name, return
if new.id() == orig.id() {
......@@ -695,6 +857,7 @@ impl<'a, D: Disk> Transaction<'a, D> {
}
// Remove new name
// (we renamed to a node that already exists, overwrite it.)
self.remove_node(
new_parent_ptr,
new_name,
......@@ -715,153 +878,176 @@ impl<'a, D: Disk> Transaction<'a, D> {
Ok(())
}
fn node_block_ptr(
fn check_name(&mut self, parent_ptr: &TreePtr<Node>, name: &str) -> Result<()> {
if name.contains(':') {
return Err(Error::new(EINVAL));
}
if name.len() > DIR_ENTRY_MAX_LENGTH {
return Err(Error::new(EINVAL));
}
if self.find_node(parent_ptr.clone(), name).is_ok() {
return Err(Error::new(EEXIST));
}
Ok(())
}
/// Get a pointer to a the record of `node` with the given offset.
/// (i.e, to the `n`th record of `node`.)
fn node_record_ptr(
&mut self,
node: &TreeData<Node>,
block_offset: u64,
) -> Result<BlockPtr<BlockRaw>> {
match NodeLevel::new(block_offset).ok_or(Error::new(ERANGE))? {
NodeLevel::L0(i0) => Ok(node.data().level0[i0]),
NodeLevel::L1(i1, i0) => {
let l0 = self.read_block(node.data().level1[i1])?;
Ok(l0.data().ptrs[i0])
}
NodeLevel::L2(i2, i1, i0) => {
let l1 = self.read_block(node.data().level2[i2])?;
let l0 = self.read_block(l1.data().ptrs[i1])?;
Ok(l0.data().ptrs[i0])
}
NodeLevel::L3(i3, i2, i1, i0) => {
let l2 = self.read_block(node.data().level3[i3])?;
let l1 = self.read_block(l2.data().ptrs[i2])?;
let l0 = self.read_block(l1.data().ptrs[i1])?;
Ok(l0.data().ptrs[i0])
}
NodeLevel::L4(i4, i3, i2, i1, i0) => {
let l3 = self.read_block(node.data().level4[i4])?;
let l2 = self.read_block(l3.data().ptrs[i3])?;
let l1 = self.read_block(l2.data().ptrs[i2])?;
let l0 = self.read_block(l1.data().ptrs[i1])?;
Ok(l0.data().ptrs[i0])
record_offset: u64,
) -> Result<BlockPtr<RecordRaw>> {
unsafe {
match NodeLevel::new(record_offset).ok_or(Error::new(ERANGE))? {
NodeLevel::L0(i0) => Ok(node.data().level0[i0]),
NodeLevel::L1(i1, i0) => {
let l0 = self.read_block_or_empty(node.data().level1[i1])?;
Ok(l0.data().ptrs[i0])
}
NodeLevel::L2(i2, i1, i0) => {
let l1 = self.read_block_or_empty(node.data().level2[i2])?;
let l0 = self.read_block_or_empty(l1.data().ptrs[i1])?;
Ok(l0.data().ptrs[i0])
}
NodeLevel::L3(i3, i2, i1, i0) => {
let l2 = self.read_block_or_empty(node.data().level3[i3])?;
let l1 = self.read_block_or_empty(l2.data().ptrs[i2])?;
let l0 = self.read_block_or_empty(l1.data().ptrs[i1])?;
Ok(l0.data().ptrs[i0])
}
NodeLevel::L4(i4, i3, i2, i1, i0) => {
let l3 = self.read_block_or_empty(node.data().level4[i4])?;
let l2 = self.read_block_or_empty(l3.data().ptrs[i3])?;
let l1 = self.read_block_or_empty(l2.data().ptrs[i2])?;
let l0 = self.read_block_or_empty(l1.data().ptrs[i1])?;
Ok(l0.data().ptrs[i0])
}
}
}
}
fn remove_node_block_ptr(
fn remove_node_record_ptr(
&mut self,
node: &mut TreeData<Node>,
block_offset: u64,
record_offset: u64,
) -> Result<()> {
match NodeLevel::new(block_offset).ok_or(Error::new(ERANGE))? {
NodeLevel::L0(i0) => {
self.deallocate_block(node.data_mut().level0[i0].clear());
}
NodeLevel::L1(i1, i0) => {
let mut l0 = self.read_block(node.data().level1[i1])?;
self.deallocate_block(l0.data_mut().ptrs[i0].clear());
if l0.data().is_empty() {
self.deallocate_block(node.data_mut().level1[i1].clear());
} else {
node.data_mut().level1[i1] = self.sync_block(l0)?;
}
}
NodeLevel::L2(i2, i1, i0) => {
let mut l1 = self.read_block(node.data().level2[i2])?;
let mut l0 = self.read_block(l1.data().ptrs[i1])?;
self.deallocate_block(l0.data_mut().ptrs[i0].clear());
if l0.data().is_empty() {
self.deallocate_block(l1.data_mut().ptrs[i1].clear());
} else {
l1.data_mut().ptrs[i1] = self.sync_block(l0)?;
}
if l1.data().is_empty() {
self.deallocate_block(node.data_mut().level2[i2].clear());
} else {
node.data_mut().level2[i2] = self.sync_block(l1)?;
}
}
NodeLevel::L3(i3, i2, i1, i0) => {
let mut l2 = self.read_block(node.data().level3[i3])?;
let mut l1 = self.read_block(l2.data().ptrs[i2])?;
let mut l0 = self.read_block(l1.data().ptrs[i1])?;
self.deallocate_block(l0.data_mut().ptrs[i0].clear());
if l0.data().is_empty() {
self.deallocate_block(l1.data_mut().ptrs[i1].clear());
} else {
l1.data_mut().ptrs[i1] = self.sync_block(l0)?;
}
if l1.data().is_empty() {
self.deallocate_block(l2.data_mut().ptrs[i2].clear());
} else {
l2.data_mut().ptrs[i2] = self.sync_block(l1)?;
}
if l2.data().is_empty() {
self.deallocate_block(node.data_mut().level3[i3].clear());
} else {
node.data_mut().level3[i3] = self.sync_block(l2)?;
unsafe {
match NodeLevel::new(record_offset).ok_or(Error::new(ERANGE))? {
NodeLevel::L0(i0) => {
self.deallocate_block(node.data_mut().level0[i0].clear());
}
}
NodeLevel::L4(i4, i3, i2, i1, i0) => {
let mut l3 = self.read_block(node.data().level4[i4])?;
let mut l2 = self.read_block(l3.data().ptrs[i3])?;
let mut l1 = self.read_block(l2.data().ptrs[i2])?;
let mut l0 = self.read_block(l1.data().ptrs[i1])?;
self.deallocate_block(l0.data_mut().ptrs[i0].clear());
if l0.data().is_empty() {
self.deallocate_block(l1.data_mut().ptrs[i1].clear());
} else {
l1.data_mut().ptrs[i1] = self.sync_block(l0)?;
NodeLevel::L1(i1, i0) => {
let mut l0 = self.read_block_or_empty(node.data().level1[i1])?;
self.deallocate_block(l0.data_mut().ptrs[i0].clear());
if l0.data().is_empty() {
self.deallocate_block(node.data_mut().level1[i1].clear());
} else {
node.data_mut().level1[i1] = self.sync_block(l0)?;
}
}
if l1.data().is_empty() {
self.deallocate_block(l2.data_mut().ptrs[i2].clear());
} else {
l2.data_mut().ptrs[i2] = self.sync_block(l1)?;
NodeLevel::L2(i2, i1, i0) => {
let mut l1 = self.read_block_or_empty(node.data().level2[i2])?;
let mut l0 = self.read_block_or_empty(l1.data().ptrs[i1])?;
self.deallocate_block(l0.data_mut().ptrs[i0].clear());
if l0.data().is_empty() {
self.deallocate_block(l1.data_mut().ptrs[i1].clear());
} else {
l1.data_mut().ptrs[i1] = self.sync_block(l0)?;
}
if l1.data().is_empty() {
self.deallocate_block(node.data_mut().level2[i2].clear());
} else {
node.data_mut().level2[i2] = self.sync_block(l1)?;
}
}
if l2.data().is_empty() {
self.deallocate_block(l3.data_mut().ptrs[i3].clear());
} else {
l3.data_mut().ptrs[i3] = self.sync_block(l2)?;
NodeLevel::L3(i3, i2, i1, i0) => {
let mut l2 = self.read_block_or_empty(node.data().level3[i3])?;
let mut l1 = self.read_block_or_empty(l2.data().ptrs[i2])?;
let mut l0 = self.read_block_or_empty(l1.data().ptrs[i1])?;
self.deallocate_block(l0.data_mut().ptrs[i0].clear());
if l0.data().is_empty() {
self.deallocate_block(l1.data_mut().ptrs[i1].clear());
} else {
l1.data_mut().ptrs[i1] = self.sync_block(l0)?;
}
if l1.data().is_empty() {
self.deallocate_block(l2.data_mut().ptrs[i2].clear());
} else {
l2.data_mut().ptrs[i2] = self.sync_block(l1)?;
}
if l2.data().is_empty() {
self.deallocate_block(node.data_mut().level3[i3].clear());
} else {
node.data_mut().level3[i3] = self.sync_block(l2)?;
}
}
if l3.data().is_empty() {
self.deallocate_block(node.data_mut().level4[i4].clear());
} else {
node.data_mut().level4[i4] = self.sync_block(l3)?;
NodeLevel::L4(i4, i3, i2, i1, i0) => {
let mut l3 = self.read_block_or_empty(node.data().level4[i4])?;
let mut l2 = self.read_block_or_empty(l3.data().ptrs[i3])?;
let mut l1 = self.read_block_or_empty(l2.data().ptrs[i2])?;
let mut l0 = self.read_block_or_empty(l1.data().ptrs[i1])?;
self.deallocate_block(l0.data_mut().ptrs[i0].clear());
if l0.data().is_empty() {
self.deallocate_block(l1.data_mut().ptrs[i1].clear());
} else {
l1.data_mut().ptrs[i1] = self.sync_block(l0)?;
}
if l1.data().is_empty() {
self.deallocate_block(l2.data_mut().ptrs[i2].clear());
} else {
l2.data_mut().ptrs[i2] = self.sync_block(l1)?;
}
if l2.data().is_empty() {
self.deallocate_block(l3.data_mut().ptrs[i3].clear());
} else {
l3.data_mut().ptrs[i3] = self.sync_block(l2)?;
}
if l3.data().is_empty() {
self.deallocate_block(node.data_mut().level4[i4].clear());
} else {
node.data_mut().level4[i4] = self.sync_block(l3)?;
}
}
}
}
Ok(())
Ok(())
}
}
fn sync_node_block_ptr(
/// Set the record at `ptr` as the data at `record_offset` of `node`.
fn sync_node_record_ptr(
&mut self,
node: &mut TreeData<Node>,
block_offset: u64,
ptr: BlockPtr<BlockRaw>,
record_offset: u64,
ptr: BlockPtr<RecordRaw>,
) -> Result<()> {
unsafe {
match NodeLevel::new(block_offset).ok_or(Error::new(ERANGE))? {
match NodeLevel::new(record_offset).ok_or(Error::new(ERANGE))? {
NodeLevel::L0(i0) => {
node.data_mut().level0[i0] = ptr;
}
NodeLevel::L1(i1, i0) => {
let mut l0 = self.read_block_or_default(node.data().level1[i1])?;
let mut l0 = self.read_block_or_empty(node.data().level1[i1])?;
l0.data_mut().ptrs[i0] = ptr;
node.data_mut().level1[i1] = self.sync_block(l0)?;
}
NodeLevel::L2(i2, i1, i0) => {
let mut l1 = self.read_block_or_default(node.data().level2[i2])?;
let mut l0 = self.read_block_or_default(l1.data().ptrs[i1])?;
let mut l1 = self.read_block_or_empty(node.data().level2[i2])?;
let mut l0 = self.read_block_or_empty(l1.data().ptrs[i1])?;
l0.data_mut().ptrs[i0] = ptr;
l1.data_mut().ptrs[i1] = self.sync_block(l0)?;
node.data_mut().level2[i2] = self.sync_block(l1)?;
}
NodeLevel::L3(i3, i2, i1, i0) => {
let mut l2 = self.read_block_or_default(node.data().level3[i3])?;
let mut l1 = self.read_block_or_default(l2.data().ptrs[i2])?;
let mut l0 = self.read_block_or_default(l1.data().ptrs[i1])?;
let mut l2 = self.read_block_or_empty(node.data().level3[i3])?;
let mut l1 = self.read_block_or_empty(l2.data().ptrs[i2])?;
let mut l0 = self.read_block_or_empty(l1.data().ptrs[i1])?;
l0.data_mut().ptrs[i0] = ptr;
l1.data_mut().ptrs[i1] = self.sync_block(l0)?;
......@@ -869,10 +1055,10 @@ impl<'a, D: Disk> Transaction<'a, D> {
node.data_mut().level3[i3] = self.sync_block(l2)?;
}
NodeLevel::L4(i4, i3, i2, i1, i0) => {
let mut l3 = self.read_block_or_default(node.data().level4[i4])?;
let mut l2 = self.read_block_or_default(l3.data().ptrs[i3])?;
let mut l1 = self.read_block_or_default(l2.data().ptrs[i2])?;
let mut l0 = self.read_block_or_default(l1.data().ptrs[i1])?;
let mut l3 = self.read_block_or_empty(node.data().level4[i4])?;
let mut l2 = self.read_block_or_empty(l3.data().ptrs[i3])?;
let mut l1 = self.read_block_or_empty(l2.data().ptrs[i2])?;
let mut l0 = self.read_block_or_empty(l1.data().ptrs[i1])?;
l0.data_mut().ptrs[i0] = ptr;
l1.data_mut().ptrs[i1] = self.sync_block(l0)?;
......@@ -893,22 +1079,36 @@ impl<'a, D: Disk> Transaction<'a, D> {
buf: &mut [u8],
) -> Result<usize> {
let node_size = node.data().size();
let mut i = 0;
while i < buf.len() && offset < node_size {
let block_ptr = self.node_block_ptr(&node, offset / BLOCK_SIZE)?;
let block = self.read_block(block_ptr)?;
let record_level = node.data().record_level();
let j = (offset % BLOCK_SIZE) as usize;
let mut bytes_read = 0;
while bytes_read < buf.len() && offset < node_size {
// How many bytes we've read into the next record
let j = (offset % record_level.bytes()) as usize;
// Number of bytes to read in this iteration
let len = min(
buf.len() - i,
min(BLOCK_SIZE - j as u64, node_size - offset) as usize,
buf.len() - bytes_read, // number of bytes we have left in `buf`
min(
record_level.bytes() - j as u64, // number of bytes we haven't read in this record
node_size - offset, // number of bytes left in this node
) as usize,
);
buf[i..i + len].copy_from_slice(&block.data()[j..j + len]);
i += len;
let record_idx = offset / record_level.bytes();
let record_ptr = self.node_record_ptr(node, record_idx)?;
// The level of the record to read.
// This is at most `record_level` due to the way `len` is computed.
let level = BlockLevel::for_bytes((j + len) as u64);
let record = unsafe { self.read_record(record_ptr, level)? };
buf[bytes_read..bytes_read + len].copy_from_slice(&record.data()[j..j + len]);
bytes_read += len;
offset += len as u64;
}
Ok(i)
Ok(bytes_read)
}
pub fn read_node(
......@@ -943,6 +1143,7 @@ impl<'a, D: Disk> Transaction<'a, D> {
pub fn truncate_node_inner(&mut self, node: &mut TreeData<Node>, size: u64) -> Result<bool> {
let old_size = node.data().size();
let record_level = node.data().record_level();
// Size already matches, return
if old_size == size {
......@@ -950,24 +1151,34 @@ impl<'a, D: Disk> Transaction<'a, D> {
}
if old_size < size {
// If size is smaller, write zeroes until the size matches
let zeroes = [0; BLOCK_SIZE as usize];
// If we're "truncating" to a larger size,
// write zeroes until the size matches
let zeroes = RecordRaw::empty(record_level).unwrap();
let mut offset = old_size;
while offset < size {
let start = offset % BLOCK_SIZE;
let end = if offset / BLOCK_SIZE == size / BLOCK_SIZE {
size % BLOCK_SIZE
let start = offset % record_level.bytes();
if start == 0 {
// We don't have to write completely zero records as read will interpret
// null record pointers as zero records
offset = size;
break;
}
let end = if offset / record_level.bytes() == size / record_level.bytes() {
size % record_level.bytes()
} else {
BLOCK_SIZE
record_level.bytes()
};
self.write_node_inner(node, &mut offset, &zeroes[start as usize..end as usize])?;
}
assert_eq!(offset, size);
} else {
// Deallocate blocks
for block in ((size + BLOCK_SIZE - 1) / BLOCK_SIZE..old_size / BLOCK_SIZE).rev() {
self.remove_node_block_ptr(node, block)?;
// Deallocate records
for record in ((size + record_level.bytes() - 1) / record_level.bytes()
..old_size / record_level.bytes())
.rev()
{
self.remove_node_record_ptr(node, record)?;
}
}
......@@ -977,6 +1188,10 @@ impl<'a, D: Disk> Transaction<'a, D> {
Ok(true)
}
/// Truncate the given node to the given size.
///
/// If `size` is larger than the node's current size,
/// expand the node with zeroes.
pub fn truncate_node(
&mut self,
node_ptr: TreePtr<Node>,
......@@ -1005,32 +1220,41 @@ impl<'a, D: Disk> Transaction<'a, D> {
) -> Result<bool> {
let mut node_changed = false;
let node_blocks = (node.data().size() + BLOCK_SIZE - 1) / BLOCK_SIZE;
let record_level = node.data().record_level();
let node_records = (node.data().size() + record_level.bytes() - 1) / record_level.bytes();
let mut i = 0;
while i < buf.len() {
let mut block_ptr = if node_blocks > (*offset / BLOCK_SIZE) {
self.node_block_ptr(node, *offset / BLOCK_SIZE)?
let j = (*offset % record_level.bytes()) as usize;
let len = min(buf.len() - i, record_level.bytes() as usize - j);
let level = BlockLevel::for_bytes((j + len) as u64);
let mut record_ptr = if node_records > (*offset / record_level.bytes()) {
self.node_record_ptr(node, *offset / record_level.bytes())?
} else {
BlockPtr::default()
BlockPtr::null(level)
};
let mut block = unsafe { self.read_block_or_default(block_ptr)? };
let mut record = unsafe { self.read_record(record_ptr, level)? };
let j = (*offset % BLOCK_SIZE) as usize;
let len = min(buf.len() - i, BLOCK_SIZE as usize - j);
if block_ptr.is_null() || buf[i..i + len] != block.data()[j..j + len] {
if buf[i..i + len] != record.data()[j..j + len] {
unsafe {
let old_addr = block.swap_addr(self.allocate()?);
// CoW record using its current level
let mut old_addr = record.swap_addr(self.allocate(record.addr().level())?);
// If the record was resized we need to dealloc the original ptr
if old_addr.is_null() {
old_addr = record_ptr.addr();
}
block.data_mut()[j..j + len].copy_from_slice(&buf[i..i + len]);
block_ptr = self.write_block(block)?;
record.data_mut()[j..j + len].copy_from_slice(&buf[i..i + len]);
record_ptr = self.write_block(record)?;
if old_addr != 0 {
if !old_addr.is_null() {
self.deallocate(old_addr);
}
}
self.sync_node_block_ptr(node, *offset / BLOCK_SIZE, block_ptr)?;
self.sync_node_record_ptr(node, *offset / record_level.bytes(), record_ptr)?;
node_changed = true;
}
......@@ -1046,6 +1270,7 @@ impl<'a, D: Disk> Transaction<'a, D> {
Ok(node_changed)
}
/// Write the bytes at `buf` to `node` starting at `offset`.
pub fn write_node(
&mut self,
node_ptr: TreePtr<Node>,
......
use core::{marker::PhantomData, mem, ops, slice};
use simple_endian::*;
use endian_num::Le;
use crate::{BlockPtr, BlockRaw};
use crate::{BlockLevel, BlockPtr, BlockRaw, BlockTrait};
// 1 << 8 = 256, this is the number of entries in a TreeList
const TREE_LIST_SHIFT: u32 = 8;
const TREE_LIST_ENTRIES: usize = 1 << TREE_LIST_SHIFT;
// Tree with 4 levels
/// A tree with 4 levels
pub type Tree = TreeList<TreeList<TreeList<TreeList<BlockRaw>>>>;
/// A [`TreePtr`] and the contents of the block it references.
#[derive(Clone, Copy, Debug, Default)]
pub struct TreeData<T> {
/// The value of the [`TreePtr`]
id: u32,
// The data
data: T,
}
......@@ -44,15 +49,21 @@ impl<T> TreeData<T> {
}
}
#[repr(packed)]
/// A list of pointers to blocks of type `T`.
/// This is one level of a [`Tree`], defined above.
#[repr(C, packed)]
pub struct TreeList<T> {
pub ptrs: [BlockPtr<T>; 1 << TREE_LIST_SHIFT],
pub ptrs: [BlockPtr<T>; TREE_LIST_ENTRIES],
}
impl<T> Default for TreeList<T> {
fn default() -> Self {
Self {
ptrs: [BlockPtr::default(); 1 << TREE_LIST_SHIFT],
unsafe impl<T> BlockTrait for TreeList<T> {
fn empty(level: BlockLevel) -> Option<Self> {
if level.0 == 0 {
Some(Self {
ptrs: [BlockPtr::default(); TREE_LIST_ENTRIES],
})
} else {
None
}
}
}
......@@ -80,13 +91,16 @@ impl<T> ops::DerefMut for TreeList<T> {
}
}
#[repr(packed)]
/// A pointer to an entry in a [`Tree`].
#[repr(C, packed)]
pub struct TreePtr<T> {
id: u32le,
id: Le<u32>,
phantom: PhantomData<T>,
}
impl<T> TreePtr<T> {
/// Get a [`TreePtr`] to the filesystem root
/// directory's node.
pub fn root() -> Self {
Self::new(1)
}
......@@ -98,6 +112,11 @@ impl<T> TreePtr<T> {
}
}
/// Create a [`TreePtr`] from [`Tree`] indices,
/// Where `indexes` is `(i3, i2, i1, i0)`.
/// - `i3` is the index into the level 3 table,
/// - `i2` is the index into the level 2 table at `i3`
/// - ...and so on.
pub fn from_indexes(indexes: (usize, usize, usize, usize)) -> Self {
const SHIFT: u32 = TREE_LIST_SHIFT;
let id = ((indexes.0 << (3 * SHIFT)) as u32)
......@@ -111,33 +130,36 @@ impl<T> TreePtr<T> {
}
pub fn id(&self) -> u32 {
{ self.id }.to_native()
self.id.to_ne()
}
pub fn is_null(&self) -> bool {
self.id() == 0
}
/// Get this indices of this [`TreePtr`] in a [`Tree`].
/// Returns `(i3, i2, i1, i0)`:
/// - `i3` is the index into the level 3 table,
/// - `i2` is the index into the level 2 table at `i3`
/// - ...and so on.
pub fn indexes(&self) -> (usize, usize, usize, usize) {
const SHIFT: u32 = TREE_LIST_SHIFT;
const NUM: u32 = 1 << SHIFT;
const MASK: u32 = NUM - 1;
let id = self.id();
(
((id >> (3 * SHIFT)) & MASK) as usize,
((id >> (2 * SHIFT)) & MASK) as usize,
((id >> SHIFT) & MASK) as usize,
(id & MASK) as usize,
)
let i3 = ((id >> (3 * SHIFT)) & MASK) as usize;
let i2 = ((id >> (2 * SHIFT)) & MASK) as usize;
let i1 = ((id >> SHIFT) & MASK) as usize;
let i0 = (id & MASK) as usize;
return (i3, i2, i1, i0);
}
}
impl<T> Clone for TreePtr<T> {
fn clone(&self) -> Self {
Self {
id: self.id,
phantom: PhantomData,
}
*self
}
}
......
use failure::Error;
use std::{
fs,
io::{self},
......@@ -33,7 +31,7 @@ fn unmount_linux_path(mount_path: &str) -> io::Result<ExitStatus> {
))
}
pub fn unmount_path(mount_path: &str) -> Result<(), Error> {
pub fn unmount_path(mount_path: &str) -> Result<(), io::Error> {
if cfg!(target_os = "redox") {
fs::remove_file(format!(":{}", mount_path))?
} else {
......@@ -45,7 +43,10 @@ pub fn unmount_path(mount_path: &str) -> Result<(), Error> {
let status = status_res?;
if !status.success() {
return Err(io::Error::new(io::ErrorKind::Other, "redoxfs umount failed").into());
return Err(io::Error::new(
io::ErrorKind::Other,
"redoxfs umount failed",
));
}
}
......
......@@ -34,15 +34,22 @@ ls -lah image
mkdir image/test
time cp -r src image/test/src
dd if=/dev/urandom of=image/test/random bs=1M count=256
dd if=image/test/random of=/dev/null bs=1M count=256
time truncate --size=256M image/test/sparse
dd if=image/test/sparse of=/dev/null bs=1M count=256
dd if=/dev/zero of=image/test/zero bs=1M count=256
dd if=image/test/zero of=/dev/null bs=1M count=256
ls -lah image/test
df -h image
rm image/test/random
rm image/test/sparse
rm image/test/zero
rm -rf image/test/src
rmdir image/test
......