From 524ba45639684863e8d3dfe5813bdf46098394e7 Mon Sep 17 00:00:00 2001 From: Richard Date: Thu, 4 Jan 2024 22:11:23 +0100 Subject: [PATCH] 8 byte magic and store docs --- nullptr/commander.py | 2 +- nullptr/store.py | 14 +++++++------- store.md | 15 ++++++++++++++- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/nullptr/commander.py b/nullptr/commander.py index 4720c19..c7a8034 100644 --- a/nullptr/commander.py +++ b/nullptr/commander.py @@ -23,7 +23,7 @@ class Commander(CommandLine): self.hist_file = hist_file if os.path.isfile(hist_file): readline.read_history_file(hist_file) - self.store = Store(store_file, True) + self.store = Store(store_file, False) self.agent = self.select_agent() self.api = Api(self.store, self.agent) self.centcom = CentralCommand(self.store, self.api) diff --git a/nullptr/store.py b/nullptr/store.py index f8d219f..51788c5 100644 --- a/nullptr/store.py +++ b/nullptr/store.py @@ -24,7 +24,7 @@ class StoreUnpickler(pickle.Unpickler): return self.store raise pickle.UnpicklingError("I don know the persid!") -CHUNK_MAGIC = b'ChNk' +CHUNK_MAGIC = b'ChNkcHnK' class ChunkHeader: def __init__(self): @@ -37,12 +37,12 @@ class ChunkHeader: @classmethod def parse(cls, fil): offset = fil.tell() - d = fil.read(20) - if len(d) < 20: + d = fil.read(24) + if len(d) < 24: return None o = cls() o.offset = offset - o.magic, d, o.used = unpack('<4sQQ', d) + o.magic, d, o.used = unpack('<8sQQ', d) o.size = d & 0x7fffffffffffffff o.in_use = d & 0x8000000000000000 != 0 if o.magic != CHUNK_MAGIC: @@ -54,7 +54,7 @@ class ChunkHeader: d = self.size if self.in_use: d |= 1 << 63 - d = pack('<4sQQ', self.magic, d, self.used) + d = pack('<8sQQ', self.magic, d, self.used) f.write(d) def __repr__(self): @@ -117,13 +117,13 @@ class Store: self.p(hdr) total += hdr.size if not hdr.in_use: - print(f"skip {hdr.size} {self.fil.tell()}") + # print(f"skip {hdr.size} {self.fil.tell()}") self.fil.seek(hdr.size, 1) free += hdr.size else: data = self.fil.read(hdr.used) self.load_object(data, offset) - print(f"pad {hdr.size - hdr.used}") + # print(f"pad {hdr.size - hdr.used}") self.fil.seek(hdr.size - hdr.used, 1) cnt += 1 offset = self.fil.tell() diff --git a/store.md b/store.md index 733f457..90db4d9 100644 --- a/store.md +++ b/store.md @@ -46,5 +46,18 @@ Until specified otherwise, all numbers are stored low-endian 64bit unsigned. The store file is built up out of chunks. A chunk is either empty or houses exactly one file. If a file is updated and its size fits the chunk, it is updated in-place. If the new content does not fit the chunk, a new chunk is allocated at the end of the file. The old chunk is marked as empty. -A chunk starts with a chunk header. This is just a single field describing the size of the chunk in bytes, not including the header. The first bit of the field is the IN_USE flag. If it is not set, the contents of the chunk are ignored during loading. +A chunk starts with a chunk header. The header consists of three 8-byte fields. +The first field is the magic. Its value is 'ChNkcHnK'. The magic can be used to recover from a corrupted file. + +The second field is describing the size of the chunk in bytes, not including the header. The first bit of the field is the IN_USE flag. If it is not set, the contents of the chunk are ignored during loading. + +The third field described how much of the chunk is occupied by content. This is typically less than the size of the chunk because we allocate slack for each object to grow. The slack prevents frequent reallocation. + +# Future work +This format is far from perfect. + +* file corruption sometimes occurs. The cause of this still has to be found +* Recovery of file corruption has not yet been implemented +* Diskspace improvements are possible by eliminating slack for non-changing objects such as waypoints and compressing the file +* Indices have not been implemented although a "member" index keeps track of which objects are in each system.