diff --git a/Cargo.lock b/Cargo.lock index cae109214aa390d520cee6ac81fe81325433078d..d8853e3192e204ce14f8b6c17e1ca38c44247a35 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -298,7 +298,7 @@ checksum = "20145670ba436b55d91fc92d25e71160fbfbdd57831631c8d7d36377a476f1cb" [[package]] name = "redoxfs" -version = "0.5.13" +version = "0.6.0" dependencies = [ "aes", "argon2", diff --git a/Cargo.toml b/Cargo.toml index 5541f5aae9162ed2fe0c95044f531f0be9a231d4..bca0e1d1e416dd0430e8da3129a40fa88f2b0642 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "redoxfs" description = "The Redox Filesystem" repository = "https://gitlab.redox-os.org/redox-os/redoxfs" -version = "0.5.13" +version = "0.6.0" license-file = "LICENSE" readme = "README.md" authors = ["Jeremy Soller <jackpot51@gmail.com>"] diff --git a/src/allocator.rs b/src/allocator.rs index ff9d55a6cb211dadc37bb8d31f3625d8a13dbcf6..4e450ad5e4b279b4f4f4bb74af413f38c497474c 100644 --- a/src/allocator.rs +++ b/src/allocator.rs @@ -2,7 +2,7 @@ use alloc::vec::Vec; use core::{fmt, mem, ops, slice}; use simple_endian::*; -use crate::{BlockPtr, BLOCK_SIZE}; +use crate::{BlockAddr, BlockLevel, BlockPtr, BlockTrait, BLOCK_SIZE}; pub const ALLOC_LIST_ENTRIES: usize = (BLOCK_SIZE as usize - mem::size_of::<BlockPtr<AllocList>>()) / mem::size_of::<AllocEntry>(); @@ -26,63 +26,68 @@ impl Allocator { free } - pub fn allocate(&mut self, addr_level: usize) -> Option<u64> { + pub fn allocate(&mut self, block_level: BlockLevel) -> Option<BlockAddr> { // First, find the lowest level with a free block - let mut addr_opt = None; - let mut level = addr_level; + let mut index_opt = None; + let mut level = block_level.0; while level < self.levels.len() { if !self.levels[level].is_empty() { - addr_opt = self.levels[level].pop(); + index_opt = self.levels[level].pop(); break; } level += 1; } // Next, if a free block was found, split it up until you have a usable block of the right level - let addr = addr_opt?; - while level > addr_level { + let index = index_opt?; + while level > block_level.0 { level -= 1; let level_size = 1 << level; - self.levels[level].push(addr + level_size); + self.levels[level].push(index + level_size); } - Some(addr) + Some(unsafe { BlockAddr::new(index, block_level) }) } - pub fn allocate_exact(&mut self, exact_addr: u64) -> Option<u64> { - let mut addr_opt = None; + pub fn allocate_exact(&mut self, exact_addr: BlockAddr) -> Option<BlockAddr> { + // This function only supports level 0 right now + assert_eq!(exact_addr.level().0, 0); + let exact_index = exact_addr.index(); + + let mut index_opt = None; // Go from the highest to the lowest level for level in (0..self.levels.len()).rev() { let level_size = 1 << level; // Split higher block if found - if let Some(addr) = addr_opt.take() { - self.levels[level].push(addr); - self.levels[level].push(addr + level_size); + if let Some(index) = index_opt.take() { + self.levels[level].push(index); + self.levels[level].push(index + level_size); } // Look for matching block and remove it for i in 0..self.levels[level].len() { let start = self.levels[level][i]; - if start <= exact_addr { + if start <= exact_index { let end = start + level_size; - if end > exact_addr { + if end > exact_index { self.levels[level].remove(i); - addr_opt = Some(start); + index_opt = Some(start); break; } } } } - addr_opt + Some(unsafe { BlockAddr::new(index_opt?, exact_addr.level()) }) } - pub fn deallocate(&mut self, mut addr: u64, addr_level: usize) { + pub fn deallocate(&mut self, addr: BlockAddr) { // See if block matches with a sibling - if so, join them into a larger block, and populate // this all the way to the top level - let mut level = addr_level; + let mut index = addr.index(); + let mut level = addr.level().0; loop { while level >= self.levels.len() { self.levels.push(Vec::new()); @@ -94,14 +99,14 @@ impl Allocator { let mut found = false; let mut i = 0; while i < self.levels[level].len() { - let level_addr = self.levels[level][i]; - if addr % next_size == 0 && addr + level_size == level_addr { + let level_index = self.levels[level][i]; + if index % next_size == 0 && index + level_size == level_index { self.levels[level].remove(i); found = true; break; - } else if level_addr % next_size == 0 && level_addr + level_size == addr { + } else if level_index % next_size == 0 && level_index + level_size == index { self.levels[level].remove(i); - addr = level_addr; + index = level_index; found = true; break; } @@ -109,7 +114,7 @@ impl Allocator { } if !found { - self.levels[level].push(addr); + self.levels[level].push(index); return; } @@ -120,20 +125,28 @@ impl Allocator { #[repr(packed)] pub struct AllocEntry { - addr: u64le, + index: u64le, count: i64le, } impl AllocEntry { - pub fn new(addr: u64, count: i64) -> Self { + pub fn new(index: u64, count: i64) -> Self { Self { - addr: addr.into(), + index: index.into(), count: count.into(), } } - pub fn addr(&self) -> u64 { - { self.addr }.to_native() + pub fn allocate(addr: BlockAddr) -> Self { + Self::new(addr.index(), -addr.level().blocks()) + } + + pub fn deallocate(addr: BlockAddr) -> Self { + Self::new(addr.index(), addr.level().blocks()) + } + + pub fn index(&self) -> u64 { + { self.index }.to_native() } pub fn count(&self) -> i64 { @@ -156,7 +169,7 @@ impl Copy for AllocEntry {} impl Default for AllocEntry { fn default() -> Self { Self { - addr: 0.into(), + index: 0.into(), count: 0.into(), } } @@ -164,10 +177,10 @@ impl Default for AllocEntry { impl fmt::Debug for AllocEntry { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let addr = self.addr(); + let index = self.index(); let count = self.count(); f.debug_struct("AllocEntry") - .field("addr", &addr) + .field("index", &index) .field("count", &count) .finish() } @@ -180,11 +193,15 @@ pub struct AllocList { pub entries: [AllocEntry; ALLOC_LIST_ENTRIES], } -impl Default for AllocList { - fn default() -> Self { - Self { - prev: BlockPtr::default(), - entries: [AllocEntry::default(); ALLOC_LIST_ENTRIES], +unsafe impl BlockTrait for AllocList { + fn empty(level: BlockLevel) -> Option<Self> { + if level.0 == 0 { + Some(Self { + prev: BlockPtr::default(), + entries: [AllocEntry::default(); ALLOC_LIST_ENTRIES], + }) + } else { + None } } } @@ -236,14 +253,17 @@ fn alloc_node_size_test() { fn allocator_test() { let mut alloc = Allocator::default(); - assert_eq!(alloc.allocate(0), None); + assert_eq!(alloc.allocate(BlockLevel::default()), None); - alloc.deallocate(1, 0); - assert_eq!(alloc.allocate(0), Some(1)); - assert_eq!(alloc.allocate(0), None); + alloc.deallocate(unsafe { BlockAddr::new(1, BlockLevel::default()) }); + assert_eq!( + alloc.allocate(BlockLevel::default()), + Some(unsafe { BlockAddr::new(1, BlockLevel::default()) }) + ); + assert_eq!(alloc.allocate(BlockLevel::default()), None); for addr in 1023..2048 { - alloc.deallocate(addr, 0); + alloc.deallocate(unsafe { BlockAddr::new(addr, BlockLevel::default()) }); } assert_eq!(alloc.levels.len(), 11); @@ -258,9 +278,12 @@ fn allocator_test() { } for addr in 1023..2048 { - assert_eq!(alloc.allocate(0), Some(addr)); + assert_eq!( + alloc.allocate(BlockLevel::default()), + Some(unsafe { BlockAddr::new(addr, BlockLevel::default()) }) + ); } - assert_eq!(alloc.allocate(0), None); + assert_eq!(alloc.allocate(BlockLevel::default()), None); assert_eq!(alloc.levels.len(), 11); for level in 0..alloc.levels.len() { diff --git a/src/bin/mount.rs b/src/bin/mount.rs index d157c14ce833db782cf9351d6357b517989b2e18..d6e2f0234087cca30383e4ecc7b574a39ac8fffe 100644 --- a/src/bin/mount.rs +++ b/src/bin/mount.rs @@ -159,33 +159,30 @@ fn filesystem_by_path( }; match DiskFile::open(path).map(DiskCache::new) { - Ok(disk) => match redoxfs::FileSystem::open( - disk, - password_opt.as_deref(), - block_opt, - true, - ) { - Ok(filesystem) => { - println!( - "redoxfs: opened filesystem on {} with uuid {}", - path, - Uuid::from_bytes(filesystem.header.uuid()).hyphenated() - ); - - return Some((path.to_string(), filesystem)); - } - Err(err) => match err.errno { - syscall::ENOKEY => { - if password_opt.is_some() { - println!("redoxfs: incorrect password ({}/{})", attempt, attempts); - } - } - _ => { - println!("redoxfs: failed to open filesystem {}: {}", path, err); - break; + Ok(disk) => { + match redoxfs::FileSystem::open(disk, password_opt.as_deref(), block_opt, true) { + Ok(filesystem) => { + println!( + "redoxfs: opened filesystem on {} with uuid {}", + path, + Uuid::from_bytes(filesystem.header.uuid()).hyphenated() + ); + + return Some((path.to_string(), filesystem)); } - }, - }, + Err(err) => match err.errno { + syscall::ENOKEY => { + if password_opt.is_some() { + println!("redoxfs: incorrect password ({}/{})", attempt, attempts); + } + } + _ => { + println!("redoxfs: failed to open filesystem {}: {}", path, err); + break; + } + }, + } + } Err(err) => { println!("redoxfs: failed to open image {}: {}", path, err); break; diff --git a/src/block.rs b/src/block.rs index 70dabb9c7a2d0faad6c84d258d1d9631c65219a2..59ec9fdf55d353cc4c4f29898e2a6ce1d9c2c0b2 100644 --- a/src/block.rs +++ b/src/block.rs @@ -5,23 +5,91 @@ use crate::BLOCK_SIZE; const BLOCK_LIST_ENTRIES: usize = BLOCK_SIZE as usize / mem::size_of::<BlockPtr<BlockRaw>>(); +#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct BlockAddr(u64); + +impl BlockAddr { + // Unsafe because this can create invalid blocks + pub(crate) unsafe fn new(index: u64, level: BlockLevel) -> Self { + // Level must only use the lowest four bits + if level.0 > 0xF { + panic!("block level used more than four bits"); + } + + // Index must not use the highest four bits + let inner = index + .checked_shl(4) + .expect("block index used highest four bits") + | (level.0 as u64); + Self(inner) + } + + pub fn null(level: BlockLevel) -> Self { + unsafe { Self::new(0, level) } + } + + pub fn index(&self) -> u64 { + // The first four bits store the level + self.0 >> 4 + } + + pub fn level(&self) -> BlockLevel { + // The first four bits store the level + BlockLevel((self.0 & 0xF) as usize) + } + + pub fn is_null(&self) -> bool { + self.index() == 0 + } +} + +#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct BlockLevel(pub(crate) usize); + +impl BlockLevel { + pub(crate) fn for_bytes(bytes: u64) -> Self { + //TODO: optimize + let mut level = BlockLevel(0); + while level.bytes() < bytes { + level.0 += 1; + } + level + } + + pub fn blocks(self) -> i64 { + 1 << self.0 + } + + pub fn bytes(self) -> u64 { + BLOCK_SIZE << self.0 + } +} + +pub unsafe trait BlockTrait { + fn empty(level: BlockLevel) -> Option<Self> + where + Self: Sized; +} + #[derive(Clone, Copy, Debug, Default)] pub struct BlockData<T> { - addr: u64, + addr: BlockAddr, data: T, } impl<T> BlockData<T> { - pub fn new(addr: u64, data: T) -> Self { + pub fn new(addr: BlockAddr, data: T) -> Self { Self { addr, data } } - pub fn addr(&self) -> u64 { + pub fn addr(&self) -> BlockAddr { self.addr } #[must_use = "don't forget to de-allocate old block address"] - pub fn swap_addr(&mut self, addr: u64) -> u64 { + pub fn swap_addr(&mut self, addr: BlockAddr) -> BlockAddr { + // Address levels must match + assert_eq!(self.addr.level(), addr.level()); let old = self.addr; self.addr = addr; old @@ -35,15 +103,22 @@ impl<T> BlockData<T> { &mut self.data } - pub fn into_data(self) -> T { - self.data + pub(crate) unsafe fn into_parts(self) -> (BlockAddr, T) { + (self.addr, self.data) + } +} + +impl<T: BlockTrait> BlockData<T> { + pub fn empty(addr: BlockAddr) -> Option<Self> { + let empty = T::empty(addr.level())?; + Some(Self::new(addr, empty)) } } impl<T: ops::Deref<Target = [u8]>> BlockData<T> { pub fn create_ptr(&self) -> BlockPtr<T> { BlockPtr { - addr: self.addr.into(), + addr: self.addr.0.into(), hash: seahash::hash(self.data.deref()).into(), phantom: PhantomData, } @@ -55,6 +130,18 @@ pub struct BlockList<T> { pub ptrs: [BlockPtr<T>; BLOCK_LIST_ENTRIES], } +unsafe impl<T> BlockTrait for BlockList<T> { + fn empty(level: BlockLevel) -> Option<Self> { + if level.0 == 0 { + Some(Self { + ptrs: [BlockPtr::default(); BLOCK_LIST_ENTRIES], + }) + } else { + None + } + } +} + impl<T> BlockList<T> { pub fn is_empty(&self) -> bool { for ptr in self.ptrs.iter() { @@ -66,14 +153,6 @@ impl<T> BlockList<T> { } } -impl<T> Default for BlockList<T> { - fn default() -> Self { - Self { - ptrs: [BlockPtr::default(); BLOCK_LIST_ENTRIES], - } - } -} - impl<T> ops::Deref for BlockList<T> { type Target = [u8]; fn deref(&self) -> &[u8] { @@ -105,8 +184,16 @@ pub struct BlockPtr<T> { } impl<T> BlockPtr<T> { - pub fn addr(&self) -> u64 { - { self.addr }.to_native() + pub fn null(level: BlockLevel) -> Self { + Self { + addr: BlockAddr::null(level).0.into(), + hash: 0.into(), + phantom: PhantomData, + } + } + + pub fn addr(&self) -> BlockAddr { + BlockAddr({ self.addr }.to_native()) } pub fn hash(&self) -> u64 { @@ -114,7 +201,7 @@ impl<T> BlockPtr<T> { } pub fn is_null(&self) -> bool { - self.addr() == 0 + self.addr().is_null() } /// Cast BlockPtr to another type @@ -169,15 +256,19 @@ impl<T> fmt::Debug for BlockPtr<T> { #[repr(packed)] pub struct BlockRaw([u8; BLOCK_SIZE as usize]); -impl Clone for BlockRaw { - fn clone(&self) -> Self { - Self(self.0) +unsafe impl BlockTrait for BlockRaw { + fn empty(level: BlockLevel) -> Option<Self> { + if level.0 == 0 { + Some(Self([0; BLOCK_SIZE as usize])) + } else { + None + } } } -impl Default for BlockRaw { - fn default() -> Self { - Self([0; BLOCK_SIZE as usize]) +impl Clone for BlockRaw { + fn clone(&self) -> Self { + Self(self.0) } } diff --git a/src/dir.rs b/src/dir.rs index 379e3b328e4baa4969a076eabc6eed70b2bb2a4c..309ac0a3a54c53f84a97456d84cacf8713138073 100644 --- a/src/dir.rs +++ b/src/dir.rs @@ -1,8 +1,7 @@ +use alloc::{boxed::Box, vec}; use core::{mem, ops, slice, str}; -use crate::{Node, TreePtr, BLOCK_SIZE}; - -const DIR_LIST_ENTRIES: usize = BLOCK_SIZE as usize / mem::size_of::<DirEntry>(); +use crate::{BlockLevel, BlockTrait, Node, TreePtr, RECORD_LEVEL}; #[repr(packed)] pub struct DirEntry { @@ -60,9 +59,22 @@ impl Default for DirEntry { } } -#[repr(packed)] +//TODO: this is a box to prevent stack overflows pub struct DirList { - pub entries: [DirEntry; DIR_LIST_ENTRIES], + pub entries: Box<[DirEntry]>, +} + +unsafe impl BlockTrait for DirList { + fn empty(level: BlockLevel) -> Option<Self> { + if level.0 <= RECORD_LEVEL { + let entries = level.bytes() as usize / mem::size_of::<DirEntry>(); + Some(Self { + entries: vec![DirEntry::default(); entries].into_boxed_slice(), + }) + } else { + None + } + } } impl DirList { @@ -76,21 +88,13 @@ impl DirList { } } -impl Default for DirList { - fn default() -> Self { - Self { - entries: [DirEntry::default(); DIR_LIST_ENTRIES], - } - } -} - impl ops::Deref for DirList { type Target = [u8]; fn deref(&self) -> &[u8] { unsafe { slice::from_raw_parts( - self as *const DirList as *const u8, - mem::size_of::<DirList>(), + self.entries.as_ptr() as *const u8, + self.entries.len() * mem::size_of::<DirEntry>(), ) as &[u8] } } @@ -99,13 +103,22 @@ impl ops::Deref for DirList { impl ops::DerefMut for DirList { fn deref_mut(&mut self) -> &mut [u8] { unsafe { - slice::from_raw_parts_mut(self as *mut DirList as *mut u8, mem::size_of::<DirList>()) - as &mut [u8] + slice::from_raw_parts_mut( + self.entries.as_mut_ptr() as *mut u8, + self.entries.len() * mem::size_of::<DirEntry>(), + ) as &mut [u8] } } } #[test] fn dir_list_size_test() { - assert_eq!(mem::size_of::<DirList>(), crate::BLOCK_SIZE as usize); + use core::ops::Deref; + for level_i in 0..RECORD_LEVEL { + let level = BlockLevel(level_i); + assert_eq!( + DirList::empty(level).unwrap().deref().len(), + level.bytes() as usize + ); + } } diff --git a/src/filesystem.rs b/src/filesystem.rs index 81dc25099fc5aabe63834035870336626a3ceea1..cbff4d2dc00a2dca0f756d2cacd766a72533f4a6 100644 --- a/src/filesystem.rs +++ b/src/filesystem.rs @@ -4,8 +4,8 @@ use core::mem; use syscall::error::{Error, Result, EIO, EKEYREJECTED, ENOENT, ENOKEY, ENOSPC}; use crate::{ - AllocEntry, AllocList, Allocator, BlockData, Disk, Header, Key, KeySlot, Node, Salt, - Transaction, TreeList, BLOCK_SIZE, HEADER_RING, + AllocEntry, AllocList, Allocator, BlockAddr, BlockData, BlockLevel, BlockTrait, Disk, Header, + Key, KeySlot, Node, Salt, Transaction, TreeList, BLOCK_SIZE, HEADER_RING, }; /// A file system @@ -169,9 +169,15 @@ impl<D: Disk> FileSystem<D> { // Set tree and alloc pointers and write header generation one fs.tx(|tx| unsafe { - let tree = BlockData::new(HEADER_RING + 1, TreeList::default()); + let tree = BlockData::new( + BlockAddr::new(HEADER_RING + 1, BlockLevel::default()), + TreeList::empty(BlockLevel::default()).unwrap(), + ); - let mut alloc = BlockData::new(HEADER_RING + 2, AllocList::default()); + let mut alloc = BlockData::new( + BlockAddr::new(HEADER_RING + 2, BlockLevel::default()), + AllocList::empty(BlockLevel::default()).unwrap(), + ); let alloc_free = size / BLOCK_SIZE - (block_offset + HEADER_RING + 4); alloc.data_mut().entries[0] = AllocEntry::new(HEADER_RING + 4, alloc_free as i64); @@ -188,7 +194,7 @@ impl<D: Disk> FileSystem<D> { fs.tx(|tx| unsafe { let mut root = BlockData::new( - HEADER_RING + 3, + BlockAddr::new(HEADER_RING + 3, BlockLevel::default()), Node::new(Node::MODE_DIR | 0o755, 0, 0, ctime, ctime_nsec), ); root.data_mut().set_links(1); @@ -206,7 +212,7 @@ impl<D: Disk> FileSystem<D> { } } - /// Start a filesystem transaction, required for making any changes + /// start a filesystem transaction, required for making any changes pub fn tx<F: FnOnce(&mut Transaction<D>) -> Result<T>, T>(&mut self, f: F) -> Result<T> { let mut tx = Transaction::new(self); let t = f(&mut tx)?; @@ -241,19 +247,18 @@ impl<D: Disk> FileSystem<D> { for alloc in allocs { for entry in alloc.data().entries.iter() { - let addr = entry.addr(); + let index = entry.index(); let count = entry.count(); if count < 0 { for i in 0..-count { //TODO: replace assert with error? - assert_eq!( - self.allocator.allocate_exact(addr + i as u64), - Some(addr + i as u64) - ); + let addr = BlockAddr::new(index + i as u64, BlockLevel::default()); + assert_eq!(self.allocator.allocate_exact(addr), Some(addr)); } } else { for i in 0..count { - self.allocator.deallocate(addr + i as u64, 0); + let addr = BlockAddr::new(index + i as u64, BlockLevel::default()); + self.allocator.deallocate(addr); } } } diff --git a/src/lib.rs b/src/lib.rs index 9159ad16a8de62665c8b2fd9e239f36b6ccd9bc5..b6552c9e58b0a82bf7196f0d6c80795fcf92997d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,14 +9,19 @@ extern crate alloc; use core::sync::atomic::AtomicUsize; pub const BLOCK_SIZE: u64 = 4096; +// A record is 4KiB << 5 = 128KiB +pub const RECORD_LEVEL: usize = 5; +pub const RECORD_SIZE: u64 = BLOCK_SIZE << RECORD_LEVEL; pub const SIGNATURE: &[u8; 8] = b"RedoxFS\0"; -pub const VERSION: u64 = 5; +pub const VERSION: u64 = 6; pub static IS_UMT: AtomicUsize = AtomicUsize::new(0); pub use self::allocator::{AllocEntry, AllocList, Allocator, ALLOC_LIST_ENTRIES}; #[cfg(feature = "std")] pub use self::archive::{archive, archive_at}; -pub use self::block::{BlockData, BlockList, BlockPtr, BlockRaw}; +pub use self::block::{ + BlockAddr, BlockData, BlockLevel, BlockList, BlockPtr, BlockRaw, BlockTrait, +}; pub use self::dir::{DirEntry, DirList}; pub use self::disk::*; pub use self::filesystem::FileSystem; @@ -25,6 +30,7 @@ pub use self::key::{Key, KeySlot, Salt}; #[cfg(feature = "std")] pub use self::mount::mount; pub use self::node::{Node, NodeLevel}; +pub use self::record::RecordRaw; pub use self::transaction::Transaction; pub use self::tree::{Tree, TreeData, TreeList, TreePtr}; #[cfg(feature = "std")] @@ -42,6 +48,7 @@ mod key; #[cfg(feature = "std")] mod mount; mod node; +mod record; mod transaction; mod tree; #[cfg(feature = "std")] diff --git a/src/node.rs b/src/node.rs index 59f6b2be74a87534d34a9a68d3179ebdd0528c87..1b37eacf0bd3c6b5f80482aece56590f39b810fb 100644 --- a/src/node.rs +++ b/src/node.rs @@ -1,7 +1,7 @@ use core::{fmt, mem, ops, slice}; use simple_endian::*; -use crate::{BlockList, BlockPtr, BlockRaw, BLOCK_SIZE}; +use crate::{BlockLevel, BlockList, BlockPtr, BlockTrait, RecordRaw, BLOCK_SIZE, RECORD_LEVEL}; pub enum NodeLevel { L0(usize), @@ -12,61 +12,61 @@ pub enum NodeLevel { } impl NodeLevel { - // Warning: this uses constant block offsets, make sure to sync with Node - pub fn new(mut block_offset: u64) -> Option<Self> { + // Warning: this uses constant record offsets, make sure to sync with Node + pub fn new(mut record_offset: u64) -> Option<Self> { // 1 << 8 = 256, this is the number of entries in a BlockList const SHIFT: u64 = 8; const NUM: u64 = 1 << SHIFT; const MASK: u64 = NUM - 1; const L0: u64 = 128; - if block_offset < L0 { - return Some(Self::L0((block_offset & MASK) as usize)); + if record_offset < L0 { + return Some(Self::L0((record_offset & MASK) as usize)); } else { - block_offset -= L0; + record_offset -= L0; } const L1: u64 = 64 * NUM; - if block_offset < L1 { + if record_offset < L1 { return Some(Self::L1( - ((block_offset >> SHIFT) & MASK) as usize, - (block_offset & MASK) as usize, + ((record_offset >> SHIFT) & MASK) as usize, + (record_offset & MASK) as usize, )); } else { - block_offset -= L1; + record_offset -= L1; } const L2: u64 = 32 * NUM * NUM; - if block_offset < L2 { + if record_offset < L2 { return Some(Self::L2( - ((block_offset >> (2 * SHIFT)) & MASK) as usize, - ((block_offset >> SHIFT) & MASK) as usize, - (block_offset & MASK) as usize, + ((record_offset >> (2 * SHIFT)) & MASK) as usize, + ((record_offset >> SHIFT) & MASK) as usize, + (record_offset & MASK) as usize, )); } else { - block_offset -= L2; + record_offset -= L2; } const L3: u64 = 16 * NUM * NUM * NUM; - if block_offset < L3 { + if record_offset < L3 { return Some(Self::L3( - ((block_offset >> (3 * SHIFT)) & MASK) as usize, - ((block_offset >> (2 * SHIFT)) & MASK) as usize, - ((block_offset >> SHIFT) & MASK) as usize, - (block_offset & MASK) as usize, + ((record_offset >> (3 * SHIFT)) & MASK) as usize, + ((record_offset >> (2 * SHIFT)) & MASK) as usize, + ((record_offset >> SHIFT) & MASK) as usize, + (record_offset & MASK) as usize, )); } else { - block_offset -= L3; + record_offset -= L3; } const L4: u64 = 12 * NUM * NUM * NUM * NUM; - if block_offset < L4 { + if record_offset < L4 { Some(Self::L4( - ((block_offset >> (4 * SHIFT)) & MASK) as usize, - ((block_offset >> (3 * SHIFT)) & MASK) as usize, - ((block_offset >> (2 * SHIFT)) & MASK) as usize, - ((block_offset >> SHIFT) & MASK) as usize, - (block_offset & MASK) as usize, + ((record_offset >> (4 * SHIFT)) & MASK) as usize, + ((record_offset >> (3 * SHIFT)) & MASK) as usize, + ((record_offset >> (2 * SHIFT)) & MASK) as usize, + ((record_offset >> SHIFT) & MASK) as usize, + (record_offset & MASK) as usize, )) } else { None @@ -74,7 +74,7 @@ impl NodeLevel { } } -type BlockListL1 = BlockList<BlockRaw>; +type BlockListL1 = BlockList<RecordRaw>; type BlockListL2 = BlockList<BlockListL1>; type BlockListL3 = BlockList<BlockListL2>; type BlockListL4 = BlockList<BlockListL3>; @@ -93,19 +93,30 @@ pub struct Node { pub mtime_nsec: u32le, pub atime: u64le, pub atime_nsec: u32le, - pub padding: [u8; BLOCK_SIZE as usize - 4090], - // 128 * BLOCK_SIZE (512 KiB, 4 KiB each) - pub level0: [BlockPtr<BlockRaw>; 128], - // 64 * 256 * BLOCK_SIZE (64 MiB, 1 MiB each) + pub record_level: u32le, + pub padding: [u8; BLOCK_SIZE as usize - 4094], + // 128 * RECORD_SIZE (16 MiB, 128 KiB each) + pub level0: [BlockPtr<RecordRaw>; 128], + // 64 * 256 * RECORD_SIZE (2 GiB, 32 MiB each) pub level1: [BlockPtr<BlockListL1>; 64], - // 32 * 256 * 256 * BLOCK_SIZE (8 GiB, 256 MiB each) + // 32 * 256 * 256 * RECORD_SIZE (256 GiB, 8 GiB each) pub level2: [BlockPtr<BlockListL2>; 32], - // 16 * 256 * 256 * 256 * BLOCK_SIZE (1 TiB, 64 GiB each) + // 16 * 256 * 256 * 256 * RECORD_SIZE (32 TiB, 2 TiB each) pub level3: [BlockPtr<BlockListL3>; 16], - // 12 * 256 * 256 * 256 * 256 * BLOCK_SIZE (192 TiB, 16 TiB each) + // 12 * 256 * 256 * 256 * 256 * RECORD_SIZE (6 PiB, 512 TiB each) pub level4: [BlockPtr<BlockListL4>; 12], } +unsafe impl BlockTrait for Node { + fn empty(level: BlockLevel) -> Option<Self> { + if level.0 == 0 { + Some(Self::default()) + } else { + None + } + } +} + impl Default for Node { fn default() -> Self { Self { @@ -120,7 +131,8 @@ impl Default for Node { mtime_nsec: 0.into(), atime: 0.into(), atime_nsec: 0.into(), - padding: [0; BLOCK_SIZE as usize - 4090], + record_level: 0.into(), + padding: [0; BLOCK_SIZE as usize - 4094], level0: [BlockPtr::default(); 128], level1: [BlockPtr::default(); 64], level2: [BlockPtr::default(); 32], @@ -153,6 +165,14 @@ impl Node { mtime_nsec: ctime_nsec.into(), atime: ctime.into(), atime_nsec: ctime_nsec.into(), + record_level: if mode & Self::MODE_TYPE == Self::MODE_FILE { + // Files take on record level + RECORD_LEVEL as u32 + } else { + // Folders do not + 0 + } + .into(), ..Default::default() } } @@ -189,6 +209,10 @@ impl Node { ({ self.atime }.to_native(), { self.atime_nsec }.to_native()) } + pub fn record_level(&self) -> BlockLevel { + BlockLevel({ self.record_level }.to_native() as usize) + } + pub fn set_mode(&mut self, mode: u16) { self.mode = mode.into(); } diff --git a/src/record.rs b/src/record.rs new file mode 100644 index 0000000000000000000000000000000000000000..6a9556b89d44df542ba6bdf89474099940e6ace4 --- /dev/null +++ b/src/record.rs @@ -0,0 +1,47 @@ +use alloc::{boxed::Box, vec}; +use core::ops; + +use crate::{BlockLevel, BlockTrait, RECORD_LEVEL}; + +//TODO: this is a box to prevent stack overflows +pub struct RecordRaw(Box<[u8]>); + +unsafe impl BlockTrait for RecordRaw { + fn empty(level: BlockLevel) -> Option<Self> { + if level.0 <= RECORD_LEVEL { + Some(Self(vec![0; level.bytes() as usize].into_boxed_slice())) + } else { + None + } + } +} + +impl Clone for RecordRaw { + fn clone(&self) -> Self { + Self(self.0.clone()) + } +} + +impl ops::Deref for RecordRaw { + type Target = [u8]; + fn deref(&self) -> &[u8] { + &self.0 + } +} + +impl ops::DerefMut for RecordRaw { + fn deref_mut(&mut self) -> &mut [u8] { + &mut self.0 + } +} + +#[test] +fn record_raw_size_test() { + for level_i in 0..RECORD_LEVEL { + let level = BlockLevel(level_i); + assert_eq!( + RecordRaw::empty(level).unwrap().len(), + level.bytes() as usize + ); + } +} diff --git a/src/transaction.rs b/src/transaction.rs index 7cf87645cdbecf20f9527b6593b797b3376534b7..36a2afa522f5ceac952b4cdc7ea0a3165b2897dd 100644 --- a/src/transaction.rs +++ b/src/transaction.rs @@ -1,4 +1,5 @@ use alloc::{ + boxed::Box, collections::{BTreeMap, VecDeque}, vec::Vec, }; @@ -12,9 +13,9 @@ use syscall::error::{ }; use crate::{ - AllocEntry, AllocList, Allocator, BlockData, BlockPtr, BlockRaw, DirEntry, DirList, Disk, - FileSystem, Header, Node, NodeLevel, TreeData, TreePtr, ALLOC_LIST_ENTRIES, BLOCK_SIZE, - HEADER_RING, + AllocEntry, AllocList, Allocator, BlockAddr, BlockData, BlockLevel, BlockPtr, BlockTrait, + DirEntry, DirList, Disk, FileSystem, Header, Node, NodeLevel, RecordRaw, TreeData, TreePtr, + ALLOC_LIST_ENTRIES, HEADER_RING, }; pub struct Transaction<'a, D: Disk> { @@ -25,8 +26,8 @@ pub struct Transaction<'a, D: Disk> { pub header_changed: bool, allocator: Allocator, allocator_log: VecDeque<AllocEntry>, - deallocate: Vec<u64>, - write_cache: BTreeMap<u64, BlockRaw>, + deallocate: Vec<BlockAddr>, + write_cache: BTreeMap<BlockAddr, Box<[u8]>>, } impl<'a, D: Disk> Transaction<'a, D> { @@ -52,10 +53,10 @@ impl<'a, D: Disk> Transaction<'a, D> { } // Unsafe because order must be done carefully and changes must be flushed to disk - unsafe fn allocate(&mut self) -> Result<u64> { - match self.allocator.allocate(0) { + unsafe fn allocate(&mut self, level: BlockLevel) -> Result<BlockAddr> { + match self.allocator.allocate(level) { Some(addr) => { - self.allocator_log.push_back(AllocEntry::new(addr, -1)); + self.allocator_log.push_back(AllocEntry::allocate(addr)); Ok(addr) } None => Err(Error::new(ENOSPC)), @@ -63,18 +64,19 @@ impl<'a, D: Disk> Transaction<'a, D> { } // Unsafe because order must be done carefully and changes must be flushed to disk - unsafe fn deallocate(&mut self, addr: u64) { + unsafe fn deallocate(&mut self, addr: BlockAddr) { //TODO: should we use some sort of not-null abstraction? - assert!(addr != 0); + assert!(!addr.is_null()); // Remove from write_cache if it is there, since it no longer needs to be written + //TODO: for larger blocks do we need to check for sub-blocks in here? self.write_cache.remove(&addr); // Search and remove the last matching entry in allocator_log let mut found = false; for i in (0..self.allocator_log.len()).rev() { let entry = self.allocator_log[i]; - if entry.addr() == addr && entry.count() == -1 { + if entry.index() == addr.index() && entry.count() == -addr.level().blocks() { found = true; self.allocator_log.remove(i); break; @@ -83,14 +85,14 @@ impl<'a, D: Disk> Transaction<'a, D> { if found { // Deallocate immediately since it is an allocation that was not needed - self.allocator.deallocate(addr, 0); + self.allocator.deallocate(addr); } else { // Deallocate later when syncing filesystem, to avoid re-use self.deallocate.push(addr); } } - fn deallocate_block<T>(&mut self, ptr: BlockPtr<T>) { + fn deallocate_block<T: BlockTrait>(&mut self, ptr: BlockPtr<T>) { if !ptr.is_null() { unsafe { self.deallocate(ptr.addr()); @@ -106,20 +108,20 @@ impl<'a, D: Disk> Transaction<'a, D> { let levels = self.allocator.levels(); for level in (0..levels.len()).rev() { let count = (1 << level) as i64; - 'addrs: for &addr in levels[level].iter() { + 'indexs: for &index in levels[level].iter() { for entry in self.allocator_log.iter_mut() { - if addr + count as u64 == entry.addr() { + if index + count as u64 == entry.index() { // New entry is at start of existing entry - *entry = AllocEntry::new(addr, count + entry.count()); - continue 'addrs; - } else if entry.addr() + entry.count() as u64 == addr { + *entry = AllocEntry::new(index, count + entry.count()); + continue 'indexs; + } else if entry.index() + entry.count() as u64 == index { // New entry is at end of existing entry - *entry = AllocEntry::new(entry.addr(), entry.count() + count); - continue 'addrs; + *entry = AllocEntry::new(entry.index(), entry.count() + count); + continue 'indexs; } } - self.allocator_log.push_back(AllocEntry::new(addr, count)); + self.allocator_log.push_back(AllocEntry::new(index, count)); } } @@ -158,18 +160,18 @@ impl<'a, D: Disk> Transaction<'a, D> { while new_blocks.len() * ALLOC_LIST_ENTRIES <= self.allocator_log.len() + self.deallocate.len() { - new_blocks.push(unsafe { self.allocate()? }); + new_blocks.push(unsafe { self.allocate(BlockLevel::default())? }); } // De-allocate old blocks (after allocation to prevent re-use) //TODO: optimize allocator log in memory while let Some(addr) = self.deallocate.pop() { - self.allocator.deallocate(addr, 0); - self.allocator_log.push_back(AllocEntry::new(addr, 1)); + self.allocator.deallocate(addr); + self.allocator_log.push_back(AllocEntry::deallocate(addr)); } for new_block in new_blocks { - let mut alloc = BlockData::new(new_block, AllocList::default()); + let mut alloc = BlockData::<AllocList>::empty(new_block).unwrap(); alloc.data_mut().prev = prev_ptr; for entry in alloc.data_mut().entries.iter_mut() { if let Some(log_entry) = self.allocator_log.pop_front() { @@ -196,8 +198,8 @@ impl<'a, D: Disk> Transaction<'a, D> { for (addr, raw) in self.write_cache.iter_mut() { assert!(self.header_changed); self.fs.encrypt(raw); - let count = unsafe { self.fs.disk.write_at(self.fs.block + addr, raw)? }; - if count != mem::size_of::<BlockRaw>() { + let count = unsafe { self.fs.disk.write_at(self.fs.block + addr.index(), raw)? }; + if count != raw.len() { // Read wrong number of bytes #[cfg(feature = "log")] log::error!("SYNC WRITE_CACHE: WRONG NUMBER OF BYTES"); @@ -231,7 +233,7 @@ impl<'a, D: Disk> Transaction<'a, D> { Ok(true) } - pub fn read_block<T: Default + DerefMut<Target = [u8]>>( + pub fn read_block<T: BlockTrait + DerefMut<Target = [u8]>>( &mut self, ptr: BlockPtr<T>, ) -> Result<BlockData<T>> { @@ -242,16 +244,23 @@ impl<'a, D: Disk> Transaction<'a, D> { return Err(Error::new(ENOENT)); } - let mut data = T::default(); + let mut data = match T::empty(ptr.addr().level()) { + Some(some) => some, + None => { + #[cfg(feature = "log")] + log::error!("READ_BLOCK: INVALID BLOCK LEVEL FOR TYPE"); + return Err(Error::new(ENOENT)); + } + }; if let Some(raw) = self.write_cache.get(&ptr.addr()) { data.copy_from_slice(raw); } else { let count = unsafe { self.fs .disk - .read_at(self.fs.block + ptr.addr(), &mut data)? + .read_at(self.fs.block + ptr.addr().index(), &mut data)? }; - if count != mem::size_of::<T>() { + if count != data.len() { // Read wrong number of bytes #[cfg(feature = "log")] log::error!("READ_BLOCK: WRONG NUMBER OF BYTES"); @@ -266,10 +275,10 @@ impl<'a, D: Disk> Transaction<'a, D> { // Incorrect hash #[cfg(feature = "log")] log::error!( - "READ_BLOCK: INCORRECT HASH {} != {} for block {}", + "READ_BLOCK: INCORRECT HASH 0x{:X} != 0x{:X} for block 0x{:X}", block_ptr.hash(), ptr.hash(), - ptr.addr() + ptr.addr().index() ); return Err(Error::new(EIO)); } @@ -280,27 +289,65 @@ impl<'a, D: Disk> Transaction<'a, D> { /// /// # Safety /// Unsafe because it creates strange BlockData types that must be swapped before use - unsafe fn read_block_or_default<T: Default + DerefMut<Target = [u8]>>( + unsafe fn read_block_or_empty<T: BlockTrait + DerefMut<Target = [u8]>>( &mut self, ptr: BlockPtr<T>, ) -> Result<BlockData<T>> { if ptr.is_null() { - Ok(BlockData::new(0, T::default())) + match T::empty(ptr.addr().level()) { + Some(empty) => Ok(BlockData::new(BlockAddr::default(), empty)), + None => { + #[cfg(feature = "log")] + log::error!("READ_BLOCK_OR_EMPTY: INVALID BLOCK LEVEL FOR TYPE"); + Err(Error::new(ENOENT)) + } + } } else { self.read_block(ptr) } } + unsafe fn read_record<T: BlockTrait + DerefMut<Target = [u8]>>( + &mut self, + ptr: BlockPtr<T>, + level: BlockLevel, + ) -> Result<BlockData<T>> { + let record = unsafe { self.read_block_or_empty(ptr)? }; + if record.addr().level() == level { + return Ok(record); + } + + // Resize record if needed + let (old_addr, old_raw) = unsafe { record.into_parts() }; + if !old_addr.is_null() { + unsafe { + self.deallocate(old_addr); + } + } + let mut raw = match T::empty(level) { + Some(empty) => empty, + None => { + #[cfg(feature = "log")] + log::error!("READ_RECORD: INVALID BLOCK LEVEL FOR TYPE"); + return Err(Error::new(ENOENT)); + } + }; + let len = min(raw.len(), old_raw.len()); + raw[..len].copy_from_slice(&old_raw[..len]); + Ok(BlockData::new(BlockAddr::null(level), raw)) + } + /// Write block data to a new address, returning new address - pub fn sync_block<T: Deref<Target = [u8]>>( + pub fn sync_block<T: BlockTrait + Deref<Target = [u8]>>( &mut self, mut block: BlockData<T>, ) -> Result<BlockPtr<T>> { // Swap block to new address - let old_addr = block.swap_addr(unsafe { self.allocate()? }); + let level = block.addr().level(); + let old_addr = block.swap_addr(unsafe { self.allocate(level)? }); // Deallocate old address (will only take effect after sync_allocator, which helps to // prevent re-use before a new header is written - if old_addr != 0 { + if !old_addr.is_null() { unsafe { self.deallocate(old_addr); } @@ -313,26 +360,27 @@ impl<'a, D: Disk> Transaction<'a, D> { /// /// # Safety /// Unsafe to encourage CoW semantics - pub(crate) unsafe fn write_block<T: Deref<Target = [u8]>>( + pub(crate) unsafe fn write_block<T: BlockTrait + Deref<Target = [u8]>>( &mut self, block: BlockData<T>, ) -> Result<BlockPtr<T>> { - if block.addr() == 0 { + if block.addr().is_null() { // Pointer is invalid #[cfg(feature = "log")] log::error!("WRITE_BLOCK: POINTER IS NULL"); return Err(Error::new(ENOENT)); } - //TODO: transmute? - let mut raw = BlockRaw::default(); - raw.copy_from_slice(block.data()); - self.write_cache.insert(block.addr(), raw); + //TODO: do not convert to boxed slice if it already is one + self.write_cache.insert( + block.addr(), + block.data().deref().to_vec().into_boxed_slice(), + ); Ok(block.create_ptr()) } - pub fn read_tree<T: Default + DerefMut<Target = [u8]>>( + pub fn read_tree<T: BlockTrait + DerefMut<Target = [u8]>>( &mut self, ptr: TreePtr<T>, ) -> Result<TreeData<T>> { @@ -351,7 +399,14 @@ impl<'a, D: Disk> Transaction<'a, D> { let raw = self.read_block(l0.data().ptrs[i0])?; //TODO: transmute instead of copy? - let mut data = T::default(); + let mut data = match T::empty(BlockLevel::default()) { + Some(some) => some, + None => { + #[cfg(feature = "log")] + log::error!("READ_TREE: INVALID BLOCK LEVEL FOR TYPE"); + return Err(Error::new(ENOENT)); + } + }; data.copy_from_slice(raw.data()); Ok(TreeData::new(ptr.id(), data)) @@ -368,11 +423,11 @@ impl<'a, D: Disk> Transaction<'a, D> { unsafe { let mut l3 = self.read_block(self.header.tree)?; for i3 in 0..l3.data().ptrs.len() { - let mut l2 = self.read_block_or_default(l3.data().ptrs[i3])?; + let mut l2 = self.read_block_or_empty(l3.data().ptrs[i3])?; for i2 in 0..l2.data().ptrs.len() { - let mut l1 = self.read_block_or_default(l2.data().ptrs[i2])?; + let mut l1 = self.read_block_or_empty(l2.data().ptrs[i2])?; for i1 in 0..l1.data().ptrs.len() { - let mut l0 = self.read_block_or_default(l1.data().ptrs[i1])?; + let mut l0 = self.read_block_or_empty(l1.data().ptrs[i1])?; for i0 in 0..l0.data().ptrs.len() { let pn = l0.data().ptrs[i0]; @@ -456,11 +511,12 @@ impl<'a, D: Disk> Transaction<'a, D> { children: &mut Vec<DirEntry>, ) -> Result<()> { let parent = self.read_tree(parent_ptr)?; - for block_offset in 0..(parent.data().size() / BLOCK_SIZE) { - let block_ptr = self.node_block_ptr(&parent, block_offset)?; + let record_level = parent.data().record_level(); + for record_offset in 0..(parent.data().size() / record_level.bytes()) { + let block_ptr = self.node_record_ptr(&parent, record_offset)?; let dir_ptr: BlockPtr<DirList> = unsafe { block_ptr.cast() }; let dir = self.read_block(dir_ptr)?; - for entry in dir.data().entries { + for entry in dir.data().entries.iter() { let node_ptr = entry.node_ptr(); // Skip empty entries @@ -468,7 +524,7 @@ impl<'a, D: Disk> Transaction<'a, D> { continue; } - children.push(entry); + children.push(*entry); } } @@ -478,11 +534,12 @@ impl<'a, D: Disk> Transaction<'a, D> { //TODO: improve performance (h-tree?) pub fn find_node(&mut self, parent_ptr: TreePtr<Node>, name: &str) -> Result<TreeData<Node>> { let parent = self.read_tree(parent_ptr)?; - for block_offset in 0..(parent.data().size() / BLOCK_SIZE) { - let block_ptr = self.node_block_ptr(&parent, block_offset)?; + let record_level = parent.data().record_level(); + for block_offset in 0..(parent.data().size() / record_level.bytes()) { + let block_ptr = self.node_record_ptr(&parent, block_offset)?; let dir_ptr: BlockPtr<DirList> = unsafe { block_ptr.cast() }; let dir = self.read_block(dir_ptr)?; - for entry in dir.data().entries { + for entry in dir.data().entries.iter() { let node_ptr = entry.node_ptr(); // Skip empty entries @@ -523,7 +580,7 @@ impl<'a, D: Disk> Transaction<'a, D> { unsafe { let parent = self.read_tree(parent_ptr)?; let node_block_data = BlockData::new( - self.allocate()?, + self.allocate(BlockLevel::default())?, Node::new( mode, parent.data().uid(), @@ -564,10 +621,11 @@ impl<'a, D: Disk> Transaction<'a, D> { let entry = DirEntry::new(node_ptr, name).ok_or(Error::new(EINVAL))?; - let block_end = parent.data().size() / BLOCK_SIZE; - for block_offset in 0..block_end { - let mut dir_block_ptr = self.node_block_ptr(&parent, block_offset)?; - let mut dir_ptr: BlockPtr<DirList> = unsafe { dir_block_ptr.cast() }; + let record_level = parent.data().record_level(); + let record_end = parent.data().size() / record_level.bytes(); + for record_offset in 0..record_end { + let mut dir_record_ptr = self.node_record_ptr(&parent, record_offset)?; + let mut dir_ptr: BlockPtr<DirList> = unsafe { dir_record_ptr.cast() }; let mut dir = self.read_block(dir_ptr)?; let mut dir_changed = false; for old_entry in dir.data_mut().entries.iter_mut() { @@ -582,9 +640,9 @@ impl<'a, D: Disk> Transaction<'a, D> { } if dir_changed { dir_ptr = self.sync_block(dir)?; - dir_block_ptr = unsafe { dir_ptr.cast() }; + dir_record_ptr = unsafe { dir_ptr.cast() }; - self.sync_node_block_ptr(&mut parent, block_offset, dir_block_ptr)?; + self.sync_node_record_ptr(&mut parent, record_offset, dir_record_ptr)?; self.sync_trees(&[parent, node])?; return Ok(()); @@ -592,13 +650,16 @@ impl<'a, D: Disk> Transaction<'a, D> { } // Append a new dirlist, with first entry set to new entry - let mut dir = BlockData::new(unsafe { self.allocate()? }, DirList::default()); + let mut dir = + BlockData::<DirList>::empty(unsafe { self.allocate(BlockLevel::default())? }).unwrap(); dir.data_mut().entries[0] = entry; let dir_ptr = unsafe { self.write_block(dir)? }; - let dir_block_ptr: BlockPtr<BlockRaw> = unsafe { dir_ptr.cast() }; + let dir_record_ptr = unsafe { dir_ptr.cast() }; - self.sync_node_block_ptr(&mut parent, block_end, dir_block_ptr)?; - parent.data_mut().set_size((block_end + 1) * BLOCK_SIZE); + self.sync_node_record_ptr(&mut parent, record_end, dir_record_ptr)?; + parent + .data_mut() + .set_size((record_end + 1) * record_level.bytes()); self.sync_trees(&[parent, node])?; Ok(()) @@ -606,10 +667,11 @@ impl<'a, D: Disk> Transaction<'a, D> { pub fn remove_node(&mut self, parent_ptr: TreePtr<Node>, name: &str, mode: u16) -> Result<()> { let mut parent = self.read_tree(parent_ptr)?; - let blocks = parent.data().size() / BLOCK_SIZE; - for block_offset in 0..blocks { - let mut dir_block_ptr = self.node_block_ptr(&parent, block_offset)?; - let mut dir_ptr: BlockPtr<DirList> = unsafe { dir_block_ptr.cast() }; + let record_level = parent.data().record_level(); + let records = parent.data().size() / record_level.bytes(); + for record_offset in 0..records { + let mut dir_record_ptr = self.node_record_ptr(&parent, record_offset)?; + let mut dir_ptr: BlockPtr<DirList> = unsafe { dir_record_ptr.cast() }; let mut dir = self.read_block(dir_ptr)?; let mut node_opt = None; for entry in dir.data_mut().entries.iter_mut() { @@ -658,15 +720,32 @@ impl<'a, D: Disk> Transaction<'a, D> { self.truncate_node_inner(&mut node, 0)?; } - if block_offset == blocks - 1 && dir.data().is_empty() { - // Remove empty parent block, if it is at the end - self.remove_node_block_ptr(&mut parent, block_offset)?; - parent.data_mut().set_size(block_offset * BLOCK_SIZE); + if record_offset == records - 1 && dir.data().is_empty() { + let mut remove_record = record_offset; + loop { + // Remove empty parent record, if it is at the end + self.remove_node_record_ptr(&mut parent, remove_record)?; + parent + .data_mut() + .set_size(remove_record * record_level.bytes()); + + // Keep going for any other empty records + if remove_record > 0 { + remove_record -= 1; + dir_record_ptr = self.node_record_ptr(&parent, remove_record)?; + dir_ptr = unsafe { dir_record_ptr.cast() }; + dir = self.read_block(dir_ptr)?; + if dir.data().is_empty() { + continue; + } + } + break; + } } else { - // Save new parent block + // Save new parent record dir_ptr = self.sync_block(dir)?; - dir_block_ptr = unsafe { dir_ptr.cast() }; - self.sync_node_block_ptr(&mut parent, block_offset, dir_block_ptr)?; + dir_record_ptr = unsafe { dir_ptr.cast() }; + self.sync_node_record_ptr(&mut parent, record_offset, dir_record_ptr)?; } // Sync both parent and node at the same time @@ -716,153 +795,157 @@ impl<'a, D: Disk> Transaction<'a, D> { Ok(()) } - fn node_block_ptr( + fn node_record_ptr( &mut self, node: &TreeData<Node>, - block_offset: u64, - ) -> Result<BlockPtr<BlockRaw>> { - match NodeLevel::new(block_offset).ok_or(Error::new(ERANGE))? { - NodeLevel::L0(i0) => Ok(node.data().level0[i0]), - NodeLevel::L1(i1, i0) => { - let l0 = self.read_block(node.data().level1[i1])?; - Ok(l0.data().ptrs[i0]) - } - NodeLevel::L2(i2, i1, i0) => { - let l1 = self.read_block(node.data().level2[i2])?; - let l0 = self.read_block(l1.data().ptrs[i1])?; - Ok(l0.data().ptrs[i0]) - } - NodeLevel::L3(i3, i2, i1, i0) => { - let l2 = self.read_block(node.data().level3[i3])?; - let l1 = self.read_block(l2.data().ptrs[i2])?; - let l0 = self.read_block(l1.data().ptrs[i1])?; - Ok(l0.data().ptrs[i0]) - } - NodeLevel::L4(i4, i3, i2, i1, i0) => { - let l3 = self.read_block(node.data().level4[i4])?; - let l2 = self.read_block(l3.data().ptrs[i3])?; - let l1 = self.read_block(l2.data().ptrs[i2])?; - let l0 = self.read_block(l1.data().ptrs[i1])?; - Ok(l0.data().ptrs[i0]) + record_offset: u64, + ) -> Result<BlockPtr<RecordRaw>> { + unsafe { + match NodeLevel::new(record_offset).ok_or(Error::new(ERANGE))? { + NodeLevel::L0(i0) => Ok(node.data().level0[i0]), + NodeLevel::L1(i1, i0) => { + let l0 = self.read_block_or_empty(node.data().level1[i1])?; + Ok(l0.data().ptrs[i0]) + } + NodeLevel::L2(i2, i1, i0) => { + let l1 = self.read_block_or_empty(node.data().level2[i2])?; + let l0 = self.read_block_or_empty(l1.data().ptrs[i1])?; + Ok(l0.data().ptrs[i0]) + } + NodeLevel::L3(i3, i2, i1, i0) => { + let l2 = self.read_block_or_empty(node.data().level3[i3])?; + let l1 = self.read_block_or_empty(l2.data().ptrs[i2])?; + let l0 = self.read_block_or_empty(l1.data().ptrs[i1])?; + Ok(l0.data().ptrs[i0]) + } + NodeLevel::L4(i4, i3, i2, i1, i0) => { + let l3 = self.read_block_or_empty(node.data().level4[i4])?; + let l2 = self.read_block_or_empty(l3.data().ptrs[i3])?; + let l1 = self.read_block_or_empty(l2.data().ptrs[i2])?; + let l0 = self.read_block_or_empty(l1.data().ptrs[i1])?; + Ok(l0.data().ptrs[i0]) + } } } } - fn remove_node_block_ptr( + fn remove_node_record_ptr( &mut self, node: &mut TreeData<Node>, - block_offset: u64, + record_offset: u64, ) -> Result<()> { - match NodeLevel::new(block_offset).ok_or(Error::new(ERANGE))? { - NodeLevel::L0(i0) => { - self.deallocate_block(node.data_mut().level0[i0].clear()); - } - NodeLevel::L1(i1, i0) => { - let mut l0 = self.read_block(node.data().level1[i1])?; - self.deallocate_block(l0.data_mut().ptrs[i0].clear()); - if l0.data().is_empty() { - self.deallocate_block(node.data_mut().level1[i1].clear()); - } else { - node.data_mut().level1[i1] = self.sync_block(l0)?; - } - } - NodeLevel::L2(i2, i1, i0) => { - let mut l1 = self.read_block(node.data().level2[i2])?; - let mut l0 = self.read_block(l1.data().ptrs[i1])?; - self.deallocate_block(l0.data_mut().ptrs[i0].clear()); - if l0.data().is_empty() { - self.deallocate_block(l1.data_mut().ptrs[i1].clear()); - } else { - l1.data_mut().ptrs[i1] = self.sync_block(l0)?; - } - if l1.data().is_empty() { - self.deallocate_block(node.data_mut().level2[i2].clear()); - } else { - node.data_mut().level2[i2] = self.sync_block(l1)?; - } - } - NodeLevel::L3(i3, i2, i1, i0) => { - let mut l2 = self.read_block(node.data().level3[i3])?; - let mut l1 = self.read_block(l2.data().ptrs[i2])?; - let mut l0 = self.read_block(l1.data().ptrs[i1])?; - self.deallocate_block(l0.data_mut().ptrs[i0].clear()); - if l0.data().is_empty() { - self.deallocate_block(l1.data_mut().ptrs[i1].clear()); - } else { - l1.data_mut().ptrs[i1] = self.sync_block(l0)?; - } - if l1.data().is_empty() { - self.deallocate_block(l2.data_mut().ptrs[i2].clear()); - } else { - l2.data_mut().ptrs[i2] = self.sync_block(l1)?; - } - if l2.data().is_empty() { - self.deallocate_block(node.data_mut().level3[i3].clear()); - } else { - node.data_mut().level3[i3] = self.sync_block(l2)?; + unsafe { + match NodeLevel::new(record_offset).ok_or(Error::new(ERANGE))? { + NodeLevel::L0(i0) => { + self.deallocate_block(node.data_mut().level0[i0].clear()); } - } - NodeLevel::L4(i4, i3, i2, i1, i0) => { - let mut l3 = self.read_block(node.data().level4[i4])?; - let mut l2 = self.read_block(l3.data().ptrs[i3])?; - let mut l1 = self.read_block(l2.data().ptrs[i2])?; - let mut l0 = self.read_block(l1.data().ptrs[i1])?; - self.deallocate_block(l0.data_mut().ptrs[i0].clear()); - if l0.data().is_empty() { - self.deallocate_block(l1.data_mut().ptrs[i1].clear()); - } else { - l1.data_mut().ptrs[i1] = self.sync_block(l0)?; + NodeLevel::L1(i1, i0) => { + let mut l0 = self.read_block_or_empty(node.data().level1[i1])?; + self.deallocate_block(l0.data_mut().ptrs[i0].clear()); + if l0.data().is_empty() { + self.deallocate_block(node.data_mut().level1[i1].clear()); + } else { + node.data_mut().level1[i1] = self.sync_block(l0)?; + } } - if l1.data().is_empty() { - self.deallocate_block(l2.data_mut().ptrs[i2].clear()); - } else { - l2.data_mut().ptrs[i2] = self.sync_block(l1)?; + NodeLevel::L2(i2, i1, i0) => { + let mut l1 = self.read_block_or_empty(node.data().level2[i2])?; + let mut l0 = self.read_block_or_empty(l1.data().ptrs[i1])?; + self.deallocate_block(l0.data_mut().ptrs[i0].clear()); + if l0.data().is_empty() { + self.deallocate_block(l1.data_mut().ptrs[i1].clear()); + } else { + l1.data_mut().ptrs[i1] = self.sync_block(l0)?; + } + if l1.data().is_empty() { + self.deallocate_block(node.data_mut().level2[i2].clear()); + } else { + node.data_mut().level2[i2] = self.sync_block(l1)?; + } } - if l2.data().is_empty() { - self.deallocate_block(l3.data_mut().ptrs[i3].clear()); - } else { - l3.data_mut().ptrs[i3] = self.sync_block(l2)?; + NodeLevel::L3(i3, i2, i1, i0) => { + let mut l2 = self.read_block_or_empty(node.data().level3[i3])?; + let mut l1 = self.read_block_or_empty(l2.data().ptrs[i2])?; + let mut l0 = self.read_block_or_empty(l1.data().ptrs[i1])?; + self.deallocate_block(l0.data_mut().ptrs[i0].clear()); + if l0.data().is_empty() { + self.deallocate_block(l1.data_mut().ptrs[i1].clear()); + } else { + l1.data_mut().ptrs[i1] = self.sync_block(l0)?; + } + if l1.data().is_empty() { + self.deallocate_block(l2.data_mut().ptrs[i2].clear()); + } else { + l2.data_mut().ptrs[i2] = self.sync_block(l1)?; + } + if l2.data().is_empty() { + self.deallocate_block(node.data_mut().level3[i3].clear()); + } else { + node.data_mut().level3[i3] = self.sync_block(l2)?; + } } - if l3.data().is_empty() { - self.deallocate_block(node.data_mut().level4[i4].clear()); - } else { - node.data_mut().level4[i4] = self.sync_block(l3)?; + NodeLevel::L4(i4, i3, i2, i1, i0) => { + let mut l3 = self.read_block_or_empty(node.data().level4[i4])?; + let mut l2 = self.read_block_or_empty(l3.data().ptrs[i3])?; + let mut l1 = self.read_block_or_empty(l2.data().ptrs[i2])?; + let mut l0 = self.read_block_or_empty(l1.data().ptrs[i1])?; + self.deallocate_block(l0.data_mut().ptrs[i0].clear()); + if l0.data().is_empty() { + self.deallocate_block(l1.data_mut().ptrs[i1].clear()); + } else { + l1.data_mut().ptrs[i1] = self.sync_block(l0)?; + } + if l1.data().is_empty() { + self.deallocate_block(l2.data_mut().ptrs[i2].clear()); + } else { + l2.data_mut().ptrs[i2] = self.sync_block(l1)?; + } + if l2.data().is_empty() { + self.deallocate_block(l3.data_mut().ptrs[i3].clear()); + } else { + l3.data_mut().ptrs[i3] = self.sync_block(l2)?; + } + if l3.data().is_empty() { + self.deallocate_block(node.data_mut().level4[i4].clear()); + } else { + node.data_mut().level4[i4] = self.sync_block(l3)?; + } } } - } - Ok(()) + Ok(()) + } } - fn sync_node_block_ptr( + fn sync_node_record_ptr( &mut self, node: &mut TreeData<Node>, - block_offset: u64, - ptr: BlockPtr<BlockRaw>, + record_offset: u64, + ptr: BlockPtr<RecordRaw>, ) -> Result<()> { unsafe { - match NodeLevel::new(block_offset).ok_or(Error::new(ERANGE))? { + match NodeLevel::new(record_offset).ok_or(Error::new(ERANGE))? { NodeLevel::L0(i0) => { node.data_mut().level0[i0] = ptr; } NodeLevel::L1(i1, i0) => { - let mut l0 = self.read_block_or_default(node.data().level1[i1])?; + let mut l0 = self.read_block_or_empty(node.data().level1[i1])?; l0.data_mut().ptrs[i0] = ptr; node.data_mut().level1[i1] = self.sync_block(l0)?; } NodeLevel::L2(i2, i1, i0) => { - let mut l1 = self.read_block_or_default(node.data().level2[i2])?; - let mut l0 = self.read_block_or_default(l1.data().ptrs[i1])?; + let mut l1 = self.read_block_or_empty(node.data().level2[i2])?; + let mut l0 = self.read_block_or_empty(l1.data().ptrs[i1])?; l0.data_mut().ptrs[i0] = ptr; l1.data_mut().ptrs[i1] = self.sync_block(l0)?; node.data_mut().level2[i2] = self.sync_block(l1)?; } NodeLevel::L3(i3, i2, i1, i0) => { - let mut l2 = self.read_block_or_default(node.data().level3[i3])?; - let mut l1 = self.read_block_or_default(l2.data().ptrs[i2])?; - let mut l0 = self.read_block_or_default(l1.data().ptrs[i1])?; + let mut l2 = self.read_block_or_empty(node.data().level3[i3])?; + let mut l1 = self.read_block_or_empty(l2.data().ptrs[i2])?; + let mut l0 = self.read_block_or_empty(l1.data().ptrs[i1])?; l0.data_mut().ptrs[i0] = ptr; l1.data_mut().ptrs[i1] = self.sync_block(l0)?; @@ -870,10 +953,10 @@ impl<'a, D: Disk> Transaction<'a, D> { node.data_mut().level3[i3] = self.sync_block(l2)?; } NodeLevel::L4(i4, i3, i2, i1, i0) => { - let mut l3 = self.read_block_or_default(node.data().level4[i4])?; - let mut l2 = self.read_block_or_default(l3.data().ptrs[i3])?; - let mut l1 = self.read_block_or_default(l2.data().ptrs[i2])?; - let mut l0 = self.read_block_or_default(l1.data().ptrs[i1])?; + let mut l3 = self.read_block_or_empty(node.data().level4[i4])?; + let mut l2 = self.read_block_or_empty(l3.data().ptrs[i3])?; + let mut l1 = self.read_block_or_empty(l2.data().ptrs[i2])?; + let mut l0 = self.read_block_or_empty(l1.data().ptrs[i1])?; l0.data_mut().ptrs[i0] = ptr; l1.data_mut().ptrs[i1] = self.sync_block(l0)?; @@ -894,17 +977,20 @@ impl<'a, D: Disk> Transaction<'a, D> { buf: &mut [u8], ) -> Result<usize> { let node_size = node.data().size(); + let record_level = node.data().record_level(); let mut i = 0; while i < buf.len() && offset < node_size { - let block_ptr = self.node_block_ptr(node, offset / BLOCK_SIZE)?; - let block = self.read_block(block_ptr)?; - - let j = (offset % BLOCK_SIZE) as usize; + let j = (offset % record_level.bytes()) as usize; let len = min( buf.len() - i, - min(BLOCK_SIZE - j as u64, node_size - offset) as usize, + min(record_level.bytes() - j as u64, node_size - offset) as usize, ); - buf[i..i + len].copy_from_slice(&block.data()[j..j + len]); + let level = BlockLevel::for_bytes((j + len) as u64); + + let record_ptr = self.node_record_ptr(node, offset / record_level.bytes())?; + let record = unsafe { self.read_record(record_ptr, level)? }; + + buf[i..i + len].copy_from_slice(&record.data()[j..j + len]); i += len; offset += len as u64; @@ -944,6 +1030,7 @@ impl<'a, D: Disk> Transaction<'a, D> { pub fn truncate_node_inner(&mut self, node: &mut TreeData<Node>, size: u64) -> Result<bool> { let old_size = node.data().size(); + let record_level = node.data().record_level(); // Size already matches, return if old_size == size { @@ -952,23 +1039,32 @@ impl<'a, D: Disk> Transaction<'a, D> { if old_size < size { // If size is smaller, write zeroes until the size matches - let zeroes = [0; BLOCK_SIZE as usize]; + let zeroes = RecordRaw::empty(record_level).unwrap(); let mut offset = old_size; while offset < size { - let start = offset % BLOCK_SIZE; - let end = if offset / BLOCK_SIZE == size / BLOCK_SIZE { - size % BLOCK_SIZE + let start = offset % record_level.bytes(); + if start == 0 { + // We don't have to write completely zero records as read will interpret + // null record pointers as zero records + offset = size; + break; + } + let end = if offset / record_level.bytes() == size / record_level.bytes() { + size % record_level.bytes() } else { - BLOCK_SIZE + record_level.bytes() }; self.write_node_inner(node, &mut offset, &zeroes[start as usize..end as usize])?; } assert_eq!(offset, size); } else { - // Deallocate blocks - for block in ((size + BLOCK_SIZE - 1) / BLOCK_SIZE..old_size / BLOCK_SIZE).rev() { - self.remove_node_block_ptr(node, block)?; + // Deallocate records + for record in ((size + record_level.bytes() - 1) / record_level.bytes() + ..old_size / record_level.bytes()) + .rev() + { + self.remove_node_record_ptr(node, record)?; } } @@ -1006,32 +1102,35 @@ impl<'a, D: Disk> Transaction<'a, D> { ) -> Result<bool> { let mut node_changed = false; - let node_blocks = (node.data().size() + BLOCK_SIZE - 1) / BLOCK_SIZE; + let record_level = node.data().record_level(); + let node_records = (node.data().size() + record_level.bytes() - 1) / record_level.bytes(); let mut i = 0; while i < buf.len() { - let mut block_ptr = if node_blocks > (*offset / BLOCK_SIZE) { - self.node_block_ptr(node, *offset / BLOCK_SIZE)? + let j = (*offset % record_level.bytes()) as usize; + let len = min(buf.len() - i, record_level.bytes() as usize - j); + let level = BlockLevel::for_bytes((j + len) as u64); + + let mut record_ptr = if node_records > (*offset / record_level.bytes()) { + self.node_record_ptr(node, *offset / record_level.bytes())? } else { - BlockPtr::default() + BlockPtr::null(level) }; - let mut block = unsafe { self.read_block_or_default(block_ptr)? }; + let mut record = unsafe { self.read_record(record_ptr, level)? }; - let j = (*offset % BLOCK_SIZE) as usize; - let len = min(buf.len() - i, BLOCK_SIZE as usize - j); - if block_ptr.is_null() || buf[i..i + len] != block.data()[j..j + len] { + if buf[i..i + len] != record.data()[j..j + len] { unsafe { - let old_addr = block.swap_addr(self.allocate()?); + let old_addr = record.swap_addr(self.allocate(level)?); - block.data_mut()[j..j + len].copy_from_slice(&buf[i..i + len]); - block_ptr = self.write_block(block)?; + record.data_mut()[j..j + len].copy_from_slice(&buf[i..i + len]); + record_ptr = self.write_block(record)?; - if old_addr != 0 { + if !old_addr.is_null() { self.deallocate(old_addr); } } - self.sync_node_block_ptr(node, *offset / BLOCK_SIZE, block_ptr)?; + self.sync_node_record_ptr(node, *offset / record_level.bytes(), record_ptr)?; node_changed = true; } diff --git a/src/tree.rs b/src/tree.rs index 7e9468f561f37e54d60d2ca964ae19411898df15..d45b00fa4e8ee6859b781dbae41be6e16ee4832e 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -1,7 +1,7 @@ use core::{marker::PhantomData, mem, ops, slice}; use simple_endian::*; -use crate::{BlockPtr, BlockRaw}; +use crate::{BlockLevel, BlockPtr, BlockRaw, BlockTrait}; // 1 << 8 = 256, this is the number of entries in a TreeList const TREE_LIST_SHIFT: u32 = 8; @@ -50,10 +50,14 @@ pub struct TreeList<T> { pub ptrs: [BlockPtr<T>; TREE_LIST_ENTRIES], } -impl<T> Default for TreeList<T> { - fn default() -> Self { - Self { - ptrs: [BlockPtr::default(); TREE_LIST_ENTRIES], +unsafe impl<T> BlockTrait for TreeList<T> { + fn empty(level: BlockLevel) -> Option<Self> { + if level.0 == 0 { + Some(Self { + ptrs: [BlockPtr::default(); TREE_LIST_ENTRIES], + }) + } else { + None } } } diff --git a/test.sh b/test.sh index d75b3254d57b29e6d90d9e44483b6c7e60f882da..20d8f1ad78d9d57742d739599fcd843f5f294022 100755 --- a/test.sh +++ b/test.sh @@ -34,15 +34,22 @@ ls -lah image mkdir image/test time cp -r src image/test/src + dd if=/dev/urandom of=image/test/random bs=1M count=256 dd if=image/test/random of=/dev/null bs=1M count=256 + +time truncate --size=256M image/test/sparse +dd if=image/test/sparse of=/dev/null bs=1M count=256 + dd if=/dev/zero of=image/test/zero bs=1M count=256 dd if=image/test/zero of=/dev/null bs=1M count=256 + ls -lah image/test df -h image rm image/test/random +rm image/test/sparse rm image/test/zero rm -rf image/test/src rmdir image/test