8 byte magic and store docs

This commit is contained in:
Richard 2024-01-04 22:11:23 +01:00
parent 1b7a528655
commit 524ba45639
3 changed files with 22 additions and 9 deletions

View File

@ -23,7 +23,7 @@ class Commander(CommandLine):
self.hist_file = hist_file
if os.path.isfile(hist_file):
readline.read_history_file(hist_file)
self.store = Store(store_file, True)
self.store = Store(store_file, False)
self.agent = self.select_agent()
self.api = Api(self.store, self.agent)
self.centcom = CentralCommand(self.store, self.api)

View File

@ -24,7 +24,7 @@ class StoreUnpickler(pickle.Unpickler):
return self.store
raise pickle.UnpicklingError("I don know the persid!")
CHUNK_MAGIC = b'ChNk'
CHUNK_MAGIC = b'ChNkcHnK'
class ChunkHeader:
def __init__(self):
@ -37,12 +37,12 @@ class ChunkHeader:
@classmethod
def parse(cls, fil):
offset = fil.tell()
d = fil.read(20)
if len(d) < 20:
d = fil.read(24)
if len(d) < 24:
return None
o = cls()
o.offset = offset
o.magic, d, o.used = unpack('<4sQQ', d)
o.magic, d, o.used = unpack('<8sQQ', d)
o.size = d & 0x7fffffffffffffff
o.in_use = d & 0x8000000000000000 != 0
if o.magic != CHUNK_MAGIC:
@ -54,7 +54,7 @@ class ChunkHeader:
d = self.size
if self.in_use:
d |= 1 << 63
d = pack('<4sQQ', self.magic, d, self.used)
d = pack('<8sQQ', self.magic, d, self.used)
f.write(d)
def __repr__(self):
@ -117,13 +117,13 @@ class Store:
self.p(hdr)
total += hdr.size
if not hdr.in_use:
print(f"skip {hdr.size} {self.fil.tell()}")
# print(f"skip {hdr.size} {self.fil.tell()}")
self.fil.seek(hdr.size, 1)
free += hdr.size
else:
data = self.fil.read(hdr.used)
self.load_object(data, offset)
print(f"pad {hdr.size - hdr.used}")
# print(f"pad {hdr.size - hdr.used}")
self.fil.seek(hdr.size - hdr.used, 1)
cnt += 1
offset = self.fil.tell()

View File

@ -46,5 +46,18 @@ Until specified otherwise, all numbers are stored low-endian 64bit unsigned.
The store file is built up out of chunks. A chunk is either empty or houses exactly one file. If a file is updated and its size fits the chunk, it is updated in-place. If the new content does not fit the chunk, a new chunk is allocated at the end of the file. The old chunk is marked as empty.
A chunk starts with a chunk header. This is just a single field describing the size of the chunk in bytes, not including the header. The first bit of the field is the IN_USE flag. If it is not set, the contents of the chunk are ignored during loading.
A chunk starts with a chunk header. The header consists of three 8-byte fields.
The first field is the magic. Its value is 'ChNkcHnK'. The magic can be used to recover from a corrupted file.
The second field is describing the size of the chunk in bytes, not including the header. The first bit of the field is the IN_USE flag. If it is not set, the contents of the chunk are ignored during loading.
The third field described how much of the chunk is occupied by content. This is typically less than the size of the chunk because we allocate slack for each object to grow. The slack prevents frequent reallocation.
# Future work
This format is far from perfect.
* file corruption sometimes occurs. The cause of this still has to be found
* Recovery of file corruption has not yet been implemented
* Diskspace improvements are possible by eliminating slack for non-changing objects such as waypoints and compressing the file
* Indices have not been implemented although a "member" index keeps track of which objects are in each system.