Skip to content

VectorIndex

High-level interface to Chassis vector index.

This class provides a Pythonic wrapper around the Chassis FFI layer, handling memory management, error handling, and type conversions.

Thread Safety
  • add() and flush() require exclusive access (single writer)
  • search(), len(), is_empty(), dimensions() allow concurrent access (multi reader)
Example

index = VectorIndex("vectors.chassis", dimensions=128)

Add vectors

vectors = np.random.rand(1000, 128).astype(np.float32) for vec in vectors: ... index.add(vec) index.flush()

query = np.random.rand(128).astype(np.float32) results = index.search(query, k=10) for result in results: ... print(f"ID: {result.id}, Distance: {result.distance}")

Source code in chassis/index.py
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
class VectorIndex:
    """High-level interface to Chassis vector index.

    This class provides a Pythonic wrapper around the Chassis FFI layer,
    handling memory management, error handling, and type conversions.

    Thread Safety:
        - add() and flush() require exclusive access (single writer)
        - search(), len(), is_empty(), dimensions() allow concurrent
            access (multi reader)

    Example:
        >>> index = VectorIndex("vectors.chassis", dimensions=128)
        >>>
        >>> # Add vectors
        >>> vectors = np.random.rand(1000, 128).astype(np.float32)
        >>> for vec in vectors:
        ...     index.add(vec)
        >>> index.flush()
        >>>
        >>> # Search
        >>> query = np.random.rand(128).astype(np.float32)
        >>> results = index.search(query, k=10)
        >>> for result in results:
        ...     print(f"ID: {result.id}, Distance: {result.distance}")
    """

    def __init__(
        self,
        path: Union[str, Path],
        dimensions: int,
        options: Optional[IndexOptions] = None,
    ):
        """Open or create a vector index.

        Args:
            path: Path to the index file
            dimensions: Number of dimensions per vector
            options: Optional HNSW configuration. If None, uses defaults.

        Raises:
            InvalidPathError: If path is invalid or inaccessible
            NullPointerError: If index creation fails
            ChassisError: For other errors
        """
        self._path = Path(path)
        self._dimensions = dimensions
        self._options = options or IndexOptions()
        self._options.validate()
        self._ptr: Optional[_ffi.ChassisIndexPtr] = None
        self._closed = False

        # Encode path to UTF-8 bytes
        path_bytes = str(self._path).encode("utf-8")

        # Open index with options
        if options is None:
            # Use default options
            ptr = _ffi._lib.chassis_open(path_bytes, dimensions)
        else:
            # Use custom options
            ptr = _ffi._lib.chassis_open_with_options(
                path_bytes,
                dimensions,
                options.max_connections,
                options.ef_construction,
                options.ef_search,
            )

        if not ptr:
            error_msg = _ffi.get_last_error()
            if error_msg:
                if "dimension" in error_msg.lower():
                    raise DimensionMismatchError(error_msg)
                elif (
                    "path" in error_msg.lower() or "utf-8" in error_msg.lower()
                ):
                    raise InvalidPathError(error_msg)
                else:
                    raise ChassisError(error_msg)
            else:
                raise NullPointerError(
                    "Failed to open index (no error message)"
                )

        self._ptr = ptr

    def __del__(self):
        """Clean up resources when index is garbage collected."""
        self.close()

    def __enter__(self):
        """Context manager entry."""
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit."""
        self.close()
        return False

    def close(self) -> None:
        """Close the index and free resources.

        This is called automatically when the object is garbage collected
        or when used as a context manager. It's safe to call multiple times.
        """
        if not self._closed and self._ptr:
            _ffi._lib.chassis_free(self._ptr)
            self._ptr = None
            self._closed = True

    def _check_closed(self) -> None:
        """Check if index is closed and raise error if so."""
        if self._closed or not self._ptr:
            raise ChassisError("Index is closed")

    def add(
        self, vector: Union[Sequence[float], npt.NDArray[np.float32]]
    ) -> int:
        """Add a vector to the index.

        Args:
            vector: Vector to add (must match index dimensions)
                Can be a list, tuple, numpy array, or any sequence of floats

        Returns:
            Vector ID (0-based, sequential)

        Raises:
            ChassisError: If index is closed
            DimensionMismatchError: If vector dimensions don't match
            ChassisError: For other errors

        Note:
            This method does NOT guarantee durability. Call flush() to
            ensure data is written to disk.

        Thread Safety:
            Single-writer only. Do not call concurrently with other add()
            or flush() calls.
        """
        self._check_closed()

        # Convert to numpy array for consistent handling
        if not isinstance(vector, np.ndarray):
            vector = np.array(vector, dtype=np.float32)
        elif vector.dtype != np.float32:
            vector = vector.astype(np.float32)

        # Validate dimensions
        if len(vector) != self._dimensions:
            raise DimensionMismatchError(
                f"Vector has {len(vector)} dimensions, "
                f"but index expects {self._dimensions}"
            )

        # Ensure C-contiguous array
        if not vector.flags.c_contiguous:
            vector = np.ascontiguousarray(vector)

        # Call FFI
        vector_ptr = vector.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
        vector_id = _ffi._lib.chassis_add(self._ptr, vector_ptr, len(vector))

        # Check for error (UINT64_MAX)
        if vector_id == 2**64 - 1:
            error_msg = _ffi.get_last_error()
            if error_msg:
                if "dimension" in error_msg.lower():
                    raise DimensionMismatchError(error_msg)
                else:
                    raise ChassisError(error_msg)
            else:
                raise ChassisError("Failed to add vector")

        return int(vector_id)

    def search(
        self,
        query: Union[Sequence[float], npt.NDArray[np.float32]],
        k: int = 10,
    ) -> List[SearchResult]:
        """Search for k nearest neighbors.

        Args:
            query: Query vector (must match index dimensions)
            k: Number of nearest neighbors to return (default: 10)

        Returns:
            List of SearchResult objects, sorted by distance (ascending)

        Raises:
            ChassisError: If index is closed
            DimensionMismatchError: If query dimensions don't match
            ValueError: If k < 1
            ChassisError: For other errors

        Thread Safety:
            Multi-reader safe. Can be called concurrently with other search()
            calls, but not with add() or flush().
        """
        self._check_closed()

        if k < 1:
            raise ValueError(f"k must be >= 1, got {k}")

        # Convert to numpy array
        if not isinstance(query, np.ndarray):
            query = np.array(query, dtype=np.float32)
        elif query.dtype != np.float32:
            query = query.astype(np.float32)

        # Validate dimensions
        if len(query) != self._dimensions:
            raise DimensionMismatchError(
                f"Query has {len(query)} dimensions, "
                f"but index expects {self._dimensions}"
            )

        # Ensure C-contiguous
        if not query.flags.c_contiguous:
            query = np.ascontiguousarray(query)

        # Allocate output buffers
        out_ids = np.zeros(k, dtype=np.uint64)
        out_dists = np.zeros(k, dtype=np.float32)

        # Call FFI
        query_ptr = query.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
        ids_ptr = out_ids.ctypes.data_as(ctypes.POINTER(ctypes.c_uint64))
        dists_ptr = out_dists.ctypes.data_as(ctypes.POINTER(ctypes.c_float))

        count = _ffi._lib.chassis_search(
            self._ptr,
            query_ptr,
            len(query),
            k,
            ids_ptr,
            dists_ptr,
        )

        # Check for error (count == 0 could be error or empty index)
        if count == 0:
            error_msg = _ffi.get_last_error()
            if error_msg:
                if "dimension" in error_msg.lower():
                    raise DimensionMismatchError(error_msg)
                else:
                    raise ChassisError(error_msg)
            # Otherwise, just no results (empty index or no neighbors found)

        # Convert to SearchResult objects
        results = [
            SearchResult(id=int(out_ids[i]), distance=float(out_dists[i]))
            for i in range(count)
        ]

        return results

    def flush(self) -> None:
        """Flush all changes to disk.

        This method ensures durability by writing all pending changes to disk.
        It's expensive (1-50ms) so batch multiple add() calls before flushing.

        Raises:
            ChassisError: If flush fails

        Thread Safety:
            Single-writer only. Do not call concurrently with add() or other
            flush() calls.
        """
        self._check_closed()

        result = _ffi._lib.chassis_flush(self._ptr)

        if result != 0:
            error_msg = _ffi.get_last_error()
            raise ChassisError(f"Flush failed: {error_msg or 'unknown error'}")

    def __len__(self) -> int:
        """Get the number of vectors in the index.

        Returns:
            Number of vectors

        Thread Safety:
            Multi-reader safe.
        """
        self._check_closed()
        return int(_ffi._lib.chassis_len(self._ptr))

    def is_empty(self) -> bool:
        """Check if the index is empty.

        Returns:
            True if empty, False otherwise

        Thread Safety:
            Multi-reader safe.
        """
        self._check_closed()
        return bool(_ffi._lib.chassis_is_empty(self._ptr))

    @property
    def dimensions(self) -> int:
        """Get the dimensionality of vectors in this index.

        Returns:
            Number of dimensions

        Thread Safety:
            Multi-reader safe.
        """
        self._check_closed()
        return int(_ffi._lib.chassis_dimensions(self._ptr))

    @property
    def path(self) -> Path:
        """Get the path to the index file."""
        return self._path

    @property
    def options(self) -> IndexOptions:
        """Get the index configuration options."""
        return self._options

    def __repr__(self) -> str:
        status = "closed" if self._closed else "open"
        return (
            f"VectorIndex(path={self._path}, "
            f"dimensions={self._dimensions}, "
            f"len={len(self) if not self._closed else '?'}, "
            f"status={status})"
        )

dimensions property

Get the dimensionality of vectors in this index.

Returns:

Type Description
int

Number of dimensions

Thread Safety

Multi-reader safe.

path property

Get the path to the index file.

options property

Get the index configuration options.

__init__(path, dimensions, options=None)

Open or create a vector index.

Parameters:

Name Type Description Default
path Union[str, Path]

Path to the index file

required
dimensions int

Number of dimensions per vector

required
options Optional[IndexOptions]

Optional HNSW configuration. If None, uses defaults.

None

Raises:

Type Description
InvalidPathError

If path is invalid or inaccessible

NullPointerError

If index creation fails

ChassisError

For other errors

Source code in chassis/index.py
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
def __init__(
    self,
    path: Union[str, Path],
    dimensions: int,
    options: Optional[IndexOptions] = None,
):
    """Open or create a vector index.

    Args:
        path: Path to the index file
        dimensions: Number of dimensions per vector
        options: Optional HNSW configuration. If None, uses defaults.

    Raises:
        InvalidPathError: If path is invalid or inaccessible
        NullPointerError: If index creation fails
        ChassisError: For other errors
    """
    self._path = Path(path)
    self._dimensions = dimensions
    self._options = options or IndexOptions()
    self._options.validate()
    self._ptr: Optional[_ffi.ChassisIndexPtr] = None
    self._closed = False

    # Encode path to UTF-8 bytes
    path_bytes = str(self._path).encode("utf-8")

    # Open index with options
    if options is None:
        # Use default options
        ptr = _ffi._lib.chassis_open(path_bytes, dimensions)
    else:
        # Use custom options
        ptr = _ffi._lib.chassis_open_with_options(
            path_bytes,
            dimensions,
            options.max_connections,
            options.ef_construction,
            options.ef_search,
        )

    if not ptr:
        error_msg = _ffi.get_last_error()
        if error_msg:
            if "dimension" in error_msg.lower():
                raise DimensionMismatchError(error_msg)
            elif (
                "path" in error_msg.lower() or "utf-8" in error_msg.lower()
            ):
                raise InvalidPathError(error_msg)
            else:
                raise ChassisError(error_msg)
        else:
            raise NullPointerError(
                "Failed to open index (no error message)"
            )

    self._ptr = ptr

add(vector)

Add a vector to the index.

Parameters:

Name Type Description Default
vector Union[Sequence[float], NDArray[float32]]

Vector to add (must match index dimensions) Can be a list, tuple, numpy array, or any sequence of floats

required

Returns:

Type Description
int

Vector ID (0-based, sequential)

Raises:

Type Description
ChassisError

If index is closed

DimensionMismatchError

If vector dimensions don't match

ChassisError

For other errors

Note

This method does NOT guarantee durability. Call flush() to ensure data is written to disk.

Thread Safety

Single-writer only. Do not call concurrently with other add() or flush() calls.

Source code in chassis/index.py
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
def add(
    self, vector: Union[Sequence[float], npt.NDArray[np.float32]]
) -> int:
    """Add a vector to the index.

    Args:
        vector: Vector to add (must match index dimensions)
            Can be a list, tuple, numpy array, or any sequence of floats

    Returns:
        Vector ID (0-based, sequential)

    Raises:
        ChassisError: If index is closed
        DimensionMismatchError: If vector dimensions don't match
        ChassisError: For other errors

    Note:
        This method does NOT guarantee durability. Call flush() to
        ensure data is written to disk.

    Thread Safety:
        Single-writer only. Do not call concurrently with other add()
        or flush() calls.
    """
    self._check_closed()

    # Convert to numpy array for consistent handling
    if not isinstance(vector, np.ndarray):
        vector = np.array(vector, dtype=np.float32)
    elif vector.dtype != np.float32:
        vector = vector.astype(np.float32)

    # Validate dimensions
    if len(vector) != self._dimensions:
        raise DimensionMismatchError(
            f"Vector has {len(vector)} dimensions, "
            f"but index expects {self._dimensions}"
        )

    # Ensure C-contiguous array
    if not vector.flags.c_contiguous:
        vector = np.ascontiguousarray(vector)

    # Call FFI
    vector_ptr = vector.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
    vector_id = _ffi._lib.chassis_add(self._ptr, vector_ptr, len(vector))

    # Check for error (UINT64_MAX)
    if vector_id == 2**64 - 1:
        error_msg = _ffi.get_last_error()
        if error_msg:
            if "dimension" in error_msg.lower():
                raise DimensionMismatchError(error_msg)
            else:
                raise ChassisError(error_msg)
        else:
            raise ChassisError("Failed to add vector")

    return int(vector_id)

search(query, k=10)

Search for k nearest neighbors.

Parameters:

Name Type Description Default
query Union[Sequence[float], NDArray[float32]]

Query vector (must match index dimensions)

required
k int

Number of nearest neighbors to return (default: 10)

10

Returns:

Type Description
List[SearchResult]

List of SearchResult objects, sorted by distance (ascending)

Raises:

Type Description
ChassisError

If index is closed

DimensionMismatchError

If query dimensions don't match

ValueError

If k < 1

ChassisError

For other errors

Thread Safety

Multi-reader safe. Can be called concurrently with other search() calls, but not with add() or flush().

Source code in chassis/index.py
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
def search(
    self,
    query: Union[Sequence[float], npt.NDArray[np.float32]],
    k: int = 10,
) -> List[SearchResult]:
    """Search for k nearest neighbors.

    Args:
        query: Query vector (must match index dimensions)
        k: Number of nearest neighbors to return (default: 10)

    Returns:
        List of SearchResult objects, sorted by distance (ascending)

    Raises:
        ChassisError: If index is closed
        DimensionMismatchError: If query dimensions don't match
        ValueError: If k < 1
        ChassisError: For other errors

    Thread Safety:
        Multi-reader safe. Can be called concurrently with other search()
        calls, but not with add() or flush().
    """
    self._check_closed()

    if k < 1:
        raise ValueError(f"k must be >= 1, got {k}")

    # Convert to numpy array
    if not isinstance(query, np.ndarray):
        query = np.array(query, dtype=np.float32)
    elif query.dtype != np.float32:
        query = query.astype(np.float32)

    # Validate dimensions
    if len(query) != self._dimensions:
        raise DimensionMismatchError(
            f"Query has {len(query)} dimensions, "
            f"but index expects {self._dimensions}"
        )

    # Ensure C-contiguous
    if not query.flags.c_contiguous:
        query = np.ascontiguousarray(query)

    # Allocate output buffers
    out_ids = np.zeros(k, dtype=np.uint64)
    out_dists = np.zeros(k, dtype=np.float32)

    # Call FFI
    query_ptr = query.ctypes.data_as(ctypes.POINTER(ctypes.c_float))
    ids_ptr = out_ids.ctypes.data_as(ctypes.POINTER(ctypes.c_uint64))
    dists_ptr = out_dists.ctypes.data_as(ctypes.POINTER(ctypes.c_float))

    count = _ffi._lib.chassis_search(
        self._ptr,
        query_ptr,
        len(query),
        k,
        ids_ptr,
        dists_ptr,
    )

    # Check for error (count == 0 could be error or empty index)
    if count == 0:
        error_msg = _ffi.get_last_error()
        if error_msg:
            if "dimension" in error_msg.lower():
                raise DimensionMismatchError(error_msg)
            else:
                raise ChassisError(error_msg)
        # Otherwise, just no results (empty index or no neighbors found)

    # Convert to SearchResult objects
    results = [
        SearchResult(id=int(out_ids[i]), distance=float(out_dists[i]))
        for i in range(count)
    ]

    return results

flush()

Flush all changes to disk.

This method ensures durability by writing all pending changes to disk. It's expensive (1-50ms) so batch multiple add() calls before flushing.

Raises:

Type Description
ChassisError

If flush fails

Thread Safety

Single-writer only. Do not call concurrently with add() or other flush() calls.

Source code in chassis/index.py
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
def flush(self) -> None:
    """Flush all changes to disk.

    This method ensures durability by writing all pending changes to disk.
    It's expensive (1-50ms) so batch multiple add() calls before flushing.

    Raises:
        ChassisError: If flush fails

    Thread Safety:
        Single-writer only. Do not call concurrently with add() or other
        flush() calls.
    """
    self._check_closed()

    result = _ffi._lib.chassis_flush(self._ptr)

    if result != 0:
        error_msg = _ffi.get_last_error()
        raise ChassisError(f"Flush failed: {error_msg or 'unknown error'}")

close()

Close the index and free resources.

This is called automatically when the object is garbage collected or when used as a context manager. It's safe to call multiple times.

Source code in chassis/index.py
171
172
173
174
175
176
177
178
179
180
def close(self) -> None:
    """Close the index and free resources.

    This is called automatically when the object is garbage collected
    or when used as a context manager. It's safe to call multiple times.
    """
    if not self._closed and self._ptr:
        _ffi._lib.chassis_free(self._ptr)
        self._ptr = None
        self._closed = True

is_empty()

Check if the index is empty.

Returns:

Type Description
bool

True if empty, False otherwise

Thread Safety

Multi-reader safe.

Source code in chassis/index.py
363
364
365
366
367
368
369
370
371
372
373
def is_empty(self) -> bool:
    """Check if the index is empty.

    Returns:
        True if empty, False otherwise

    Thread Safety:
        Multi-reader safe.
    """
    self._check_closed()
    return bool(_ffi._lib.chassis_is_empty(self._ptr))