0ptr/nullptr/store.py

254 lines
6.4 KiB
Python
Raw Normal View History

2023-07-10 17:25:01 +00:00
from nullptr.models import *
2023-06-09 20:20:46 +00:00
from os.path import isfile, dirname, isdir
import os
2023-06-12 21:13:40 +00:00
from os.path import basename
2023-06-09 20:20:46 +00:00
import json
from .util import *
from time import time
2023-07-10 17:25:01 +00:00
import pickle
from struct import unpack, pack
2023-07-10 18:12:29 +00:00
from functools import partial
from io import BytesIO
2023-06-09 11:19:47 +00:00
class StorePickler(pickle.Pickler):
def persistent_id(self, obj):
return "STORE" if type(obj) == Store else None
class StoreUnpickler(pickle.Unpickler):
def __init__(self, stream, store):
self.store = store
super().__init__(stream)
def persistent_load(self, pers_id):
if pers_id == "STORE":
return self.store
raise pickle.UnpicklingError("I don know the persid!")
2023-07-10 17:25:01 +00:00
class ChunkHeader:
def __init__(self):
self.offset = 0
2023-07-10 17:25:01 +00:00
self.in_use = True
self.size = 0
self.used = 0
@classmethod
def parse(cls, fil):
offset = fil.tell()
2023-07-10 17:25:01 +00:00
d = fil.read(16)
if len(d) < 16:
return None
o = cls()
o.offset = offset
2023-07-10 17:25:01 +00:00
d, o.used = unpack('<QQ', d)
o.size = d & 0x7fffffffffffffff
o.in_use = d & 0x8000000000000000 != 0
# print(o)
return o
def write(self, f):
d = self.size
if self.in_use:
d |= 1 << 63
d = pack('<QQ', d, self.used)
f.write(d)
def __repr__(self):
return f'chunk {self.in_use} {self.size} {self.used}'
class Store:
2023-07-10 17:25:01 +00:00
def __init__(self, data_file):
self.init_models()
2023-07-10 17:25:01 +00:00
self.fil = open_file(data_file)
2023-06-12 21:13:40 +00:00
self.data = {m: {} for m in self.models}
self.system_members = {}
self.dirty_objects = set()
2023-06-18 05:06:32 +00:00
self.cleanup_interval = 600
self.last_cleanup = 0
2023-07-10 17:25:01 +00:00
self.slack = 0.1
self.slack_min = 64
self.slack_max = 1024
self.load()
def init_models(self):
self.models = all_subclasses(Base)
self.extensions = {c.ext(): c for c in self.models}
self.model_names = {c.__name__: c for c in self.models}
def dirty(self, obj):
self.dirty_objects.add(obj)
2023-06-12 11:56:56 +00:00
2023-07-10 17:25:01 +00:00
def dump_object(self, obj):
buf = BytesIO()
p = StorePickler(buf)
p.dump(obj)
return buf.getvalue()
2023-07-10 17:25:01 +00:00
def load_object(self, data, offset):
buf = BytesIO(data)
p = StoreUnpickler(buf, self)
obj = p.load()
2023-07-10 17:25:01 +00:00
obj.file_offset = offset
obj.disable_dirty = False
self.hold(obj)
2023-06-12 21:13:40 +00:00
def load(self):
cnt = 0
start_time = time()
2023-07-10 17:25:01 +00:00
self.fil.seek(0)
offset = 0
while (hdr := ChunkHeader.parse(self.fil)):
# print(hdr)
if not hdr.in_use:
self.fil.seek(hdr.size, 1)
continue
2023-07-10 17:25:01 +00:00
data = self.fil.read(hdr.used)
self.load_object(data, offset)
self.fil.seek(hdr.size - hdr.used, 1)
offset = self.fil.tell()
2023-06-12 21:13:40 +00:00
cnt += 1
2023-07-10 17:25:01 +00:00
2023-06-12 21:13:40 +00:00
dur = time() - start_time
print(f'loaded {cnt} objects in {dur:.2f} seconds')
2023-07-10 17:25:01 +00:00
def allocate_chunk(self, sz):
used = sz
slack = sz * self.slack
slack = min(slack, self.slack_max)
slack = max(slack, self.slack_min)
sz += int(slack)
2023-07-10 17:25:01 +00:00
self.fil.seek(0, 2)
offset = self.fil.tell()
h = ChunkHeader()
h.size = sz
h.used = used
h.offset = self.fil.tell()
2023-07-10 17:25:01 +00:00
h.write(self.fil)
return offset, h
2023-06-09 20:20:46 +00:00
def store(self, obj):
2023-07-10 17:25:01 +00:00
data = self.dump_object(obj)
osize = len(data)
# is there an existing chunk for this obj?
if obj.file_offset is not None:
2023-07-10 17:25:01 +00:00
# read chunk hdr
self.fil.seek(obj.file_offset)
hdr = ChunkHeader.parse(self.fil)
csize = hdr.size
# if the chunk is too small
if csize < osize:
# free the chunk
hdr.in_use = False
# force a new chunk
obj.file_offset = None
2023-07-10 17:25:01 +00:00
else:
# if it is big enough, update the used field
hdr.used = osize
self.fil.seek(hdr.offset)
2023-07-10 17:25:01 +00:00
hdr.write(self.fil)
if obj.file_offset is None:
2023-07-10 17:25:01 +00:00
obj.file_offset, hdr = self.allocate_chunk(osize)
# print(type(obj).__name__, hdr)
2023-07-10 17:25:01 +00:00
self.fil.write(data)
slack = b'\x00' * (hdr.size - hdr.used)
self.fil.write(slack)
def hold(self, obj):
typ = type(obj)
symbol = obj.symbol
obj.store = self
2023-06-12 21:13:40 +00:00
self.data[typ][symbol] = obj
if hasattr(obj, 'system') and obj.system != None:
2023-07-10 17:25:01 +00:00
system_str = obj.system.symbol
if system_str not in self.system_members:
self.system_members[system_str] = set()
self.system_members[system_str].add(obj)
2023-07-10 17:25:01 +00:00
def create(self, typ, symbol):
obj = typ(symbol, self)
self.hold(obj)
self.dirty(obj)
2023-06-09 20:20:46 +00:00
return obj
2023-06-12 21:13:40 +00:00
def get(self, typ, symbol, create=False):
if type(typ) == str and typ in self.model_names:
typ = self.model_names[typ]
symbol = symbol.upper()
2023-06-12 21:13:40 +00:00
if typ not in self.data:
return None
if symbol not in self.data[typ]:
if create:
return self.create(typ, symbol)
else:
return None
return self.data[typ][symbol]
2023-06-09 20:20:46 +00:00
2023-07-10 18:12:29 +00:00
def getter(self, typ, create=False):
if type(typ) == str and typ in self.model_names:
typ = self.model_names[typ]
return partial(self.get, typ, create=create)
2023-07-10 18:12:29 +00:00
def update(self, typ, data, symbol=None):
if type(typ) == str and typ in self.model_names:
typ = self.model_names[typ]
if symbol is None:
symbol = mg(data, typ.identifier)
2023-06-12 21:13:40 +00:00
obj = self.get(typ, symbol, True)
2023-06-10 18:49:50 +00:00
obj.update(data)
return obj
def update_list(self, typ, lst):
return [self.update(typ, d) for d in lst]
2023-06-10 18:49:50 +00:00
2023-06-12 21:13:40 +00:00
def all(self, typ):
if type(typ) == str and typ in self.model_names:
typ = self.model_names[typ]
2023-06-12 21:13:40 +00:00
for m in self.data[typ].values():
yield m
2023-06-10 17:39:32 +00:00
def all_members(self, system, typ=None):
if type(typ) == str and typ in self.model_names:
typ = self.model_names[typ]
if type(system) == System:
system = system.symbol
if system not in self.system_members:
return
for m in self.system_members[system]:
if typ is None or type(m) == typ:
yield m
2023-06-18 05:06:32 +00:00
def cleanup(self):
if time() < self.last_cleanup + self.cleanup_interval:
2023-06-18 05:06:32 +00:00
return
start_time = time()
expired = list()
for t in self.data:
for o in self.all(t):
if o.is_expired():
expired.append(o)
for o in expired:
2023-07-10 17:25:01 +00:00
# TODO
2023-06-18 05:06:32 +00:00
del self.data[type(o)][o.symbol]
dur = time() - start_time
# print(f'cleaned {len(expired)} in {dur:.03f} seconds')
2023-06-09 20:20:46 +00:00
def flush(self):
2023-06-18 05:06:32 +00:00
self.cleanup()
it = 0
start_time = time()
for obj in self.dirty_objects:
it += 1
self.store(obj)
2023-07-10 17:25:01 +00:00
self.fil.flush()
self.dirty_objects = set()
dur = time() - start_time
# print(f'flush done {it} items {dur:.2f}')