Skip to content

Sparse Index

In-memory sorted mapping from block first-keys to byte offsets.

SparseIndex()

Bases: Serializable

In-memory sparse index mapping block first-keys to offsets.

Initialize an empty sparse index with no entries.

Source code in app/index/sparse.py
def __init__(self) -> None:
    """Initialize an empty sparse index with no entries."""
    self._keys: list[Key] = []
    self._offsets: list[Offset] = []

add(first_key, block_offset)

Append a new index entry. Keys must be in ascending order.

Source code in app/index/sparse.py
def add(self, first_key: Key, block_offset: Offset) -> None:
    """Append a new index entry. Keys must be in ascending order."""
    if self._keys and first_key <= self._keys[-1]:
        raise ValueError(
            f"Keys must be ascending: {first_key!r} <= {self._keys[-1]!r}"
        )
    self._keys.append(first_key)
    self._offsets.append(block_offset)

floor_offset(key)

Return the offset of the block whose first_key <= key.

Uses bisect_right: finds rightmost entry with first_key <= key.

Source code in app/index/sparse.py
def floor_offset(self, key: Key) -> Offset | None:
    """Return the offset of the block whose first_key <= *key*.

    Uses bisect_right: finds rightmost entry with first_key <= key.
    """
    idx = bisect.bisect_right(self._keys, key) - 1
    if idx < 0:
        return None
    return self._offsets[idx]

ceil_offset(key)

Return the offset of the block whose first_key >= key.

Uses bisect_left: finds leftmost entry with first_key >= key.

Source code in app/index/sparse.py
def ceil_offset(self, key: Key) -> Offset | None:
    """Return the offset of the block whose first_key >= *key*.

    Uses bisect_left: finds leftmost entry with first_key >= key.
    """
    idx = bisect.bisect_left(self._keys, key)
    if idx >= len(self._keys):
        return None
    return self._offsets[idx]

to_bytes()

Serialize all index entries to bytes with CRC footer.

Source code in app/index/sparse.py
def to_bytes(self) -> bytes:
    """Serialize all index entries to bytes with CRC footer."""
    parts: list[bytes] = []
    for key, offset in zip(self._keys, self._offsets, strict=True):
        parts.append(encode_index_entry(key, offset))
    payload = b"".join(parts)
    return payload + crc.pack(crc.compute(payload))

from_bytes(data) classmethod

Deserialize from data. Verifies CRC integrity.

Source code in app/index/sparse.py
@classmethod
def from_bytes(cls, data: bytes) -> SparseIndex:
    """Deserialize from *data*. Verifies CRC integrity."""
    if len(data) < crc.CRC_SIZE:
        # Empty index (no entries, no CRC) is valid
        if len(data) == 0:
            return cls()
        raise CorruptRecordError(
            f"Index data too short for CRC: {len(data)}"
        )

    if len(data) == 0:
        return cls()

    payload = data[: -crc.CRC_SIZE]
    stored_crc = crc.unpack(data, len(data) - crc.CRC_SIZE)
    if not crc.verify(payload, stored_crc):
        raise CorruptRecordError("Index CRC mismatch")

    obj = cls()
    for key, offset in decode_index_entries(payload):
        obj._keys.append(key)
        obj._offsets.append(offset)
    return obj

next_offset_after(offset)

Return the first offset strictly greater than offset, or None.

Source code in app/index/sparse.py
def next_offset_after(self, offset: Offset) -> Offset | None:
    """Return the first offset strictly greater than *offset*, or None."""
    idx = bisect.bisect_right(self._offsets, offset)
    if idx < len(self._offsets):
        return self._offsets[idx]
    return None

__len__()

Return the number of index entries.

Returns:

Type Description
int

The count of (first_key, block_offset) pairs stored in

int

this index.

Source code in app/index/sparse.py
def __len__(self) -> int:
    """Return the number of index entries.

    Returns:
        The count of ``(first_key, block_offset)`` pairs stored in
        this index.
    """
    return len(self._keys)