From ac357d9ef23a07551dc6f5b06bf23f9b165c2b35 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom@acunu.com>
Date: Tue, 10 May 2011 13:49:14 +0100
Subject: [PATCH] libcastle from acunutils.hg:d724e196c846

---
 Makefile             |  28 ++
 castle.h             | 420 +++++++++++++++++++
 castle_convenience.c | 725 ++++++++++++++++++++++++++++++++
 castle_front.c       | 641 ++++++++++++++++++++++++++++
 castle_ioctl.c       | 169 ++++++++
 castle_print.c       | 139 ++++++
 castle_private.h     |  52 +++
 castle_public.h      | 588 ++++++++++++++++++++++++++
 castle_utils.c       | 262 ++++++++++++
 list.h               | 979 +++++++++++++++++++++++++++++++++++++++++++
 ring.h               | 343 +++++++++++++++
 versions             |  78 ++++
 12 files changed, 4424 insertions(+)
 create mode 100644 Makefile
 create mode 100644 castle.h
 create mode 100644 castle_convenience.c
 create mode 100644 castle_front.c
 create mode 100644 castle_ioctl.c
 create mode 100644 castle_print.c
 create mode 100644 castle_private.h
 create mode 100644 castle_public.h
 create mode 100644 castle_utils.c
 create mode 100644 list.h
 create mode 100644 ring.h
 create mode 100644 versions

diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..4aeec99
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,28 @@
+CFLAGS=-fPIC -DPIC -std=gnu99 -ggdb -Wmissing-prototypes -Wmissing-declarations -Wstrict-prototypes -Wall -Wextra -Wshadow -Werror -O2
+LIB_DESTDIR=$(DESTDIR)/usr/lib64
+INC_DESTDIR=$(DESTDIR)/usr/include/castle
+
+SONAME=libcastle.so.1
+
+all: $(SONAME)
+
+%.o: %.c *.h
+	gcc -pthread -c -o $@ $< $(CFLAGS)
+
+$(SONAME): castle_front.o castle_ioctl.o castle_convenience.o castle_print.o castle_utils.o
+	gcc -pthread -shared -Wl,-Bsymbolic -Wl,-soname,$(SONAME) -Wl,--warn-common -Wl,--fatal-warnings -Wl,--version-script=versions -o $@ $^ $(CFLAGS)
+
+install: $(SONAME)
+	mkdir -p $(LIB_DESTDIR)
+	install $(SONAME) $(LIB_DESTDIR)
+	ln -sf $(SONAME) $(LIB_DESTDIR)/libcastle.so 
+	if [ -z "$(DONT_RUN_LDCONFIG)" ]; then \
+	    ldconfig; \
+	fi
+
+	mkdir -p $(INC_DESTDIR)
+	install castle_public.h $(INC_DESTDIR)
+	install castle.h $(INC_DESTDIR)
+
+clean:
+	rm -rf *.o *.so*
diff --git a/castle.h b/castle.h
new file mode 100644
index 0000000..7c09dd5
--- /dev/null
+++ b/castle.h
@@ -0,0 +1,420 @@
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "castle_public.h"
+
+#ifndef __CASTLE_FRONT_H__
+#define __CASTLE_FRONT_H__
+
+#define CASTLE_NODE   "/dev/castle-fs/control"
+
+#ifdef __GNUC_STDC_INLINE__
+#define ONLY_INLINE extern __inline__ __attribute__((__gnu_inline__))
+#else
+#define ONLY_INLINE extern __inline__
+#endif
+
+//#define TRACE
+#ifdef TRACE
+extern volatile unsigned long ops_counter;
+extern volatile unsigned long selects_counter;
+extern volatile unsigned long ioctls_counter;
+#endif
+
+/* deref the char buffer from a castle_buffer* */
+#define CASTLE_BUF_GET(x) ((x)?(*(char**)(x)):(char*)NULL)
+
+struct castle_front_connection;
+
+struct s_castle_buffer
+{
+    /* buf must be first member for CASTLE_BUF_GET macro */
+    char* buf;
+    size_t buflen;
+};
+
+/* Type names are all over the place - create a consistent set of typedefs for the public API */
+typedef struct castle_front_connection castle_connection;
+typedef struct s_castle_buffer castle_buffer;
+typedef c_vl_okey_t castle_key;
+typedef c_vl_key_t castle_key_part;
+typedef castle_request_t castle_request;
+typedef castle_response_t castle_response;
+typedef castle_interface_token_t castle_token;
+typedef collection_id_t castle_collection;
+typedef slave_uuid_t castle_slave_uuid;
+typedef version_t castle_version;
+typedef c_env_var_t castle_env_var_id;
+
+typedef void (*castle_callback)   (castle_connection *connection,
+                                   castle_response *response, void *userdata);
+
+struct castle_blocking_call
+{
+    int                      completed;
+    int                      err;
+    uint64_t                 length;
+    castle_token             token;
+};
+
+int castle_shared_buffer_create   (castle_connection *conn,
+                                   char **buffer,
+                                   unsigned long size) __attribute__((warn_unused_result));
+int castle_shared_buffer_destroy  (castle_connection *conn,
+                                   char *buffer,
+                                   unsigned long size);
+int castle_shared_buffer_allocate (castle_connection *conn,
+                                   castle_buffer **buffer_out, unsigned long size) __attribute__((warn_unused_result));
+int castle_shared_buffer_release  (castle_connection *conn, castle_buffer* buffer);
+int castle_connect                (castle_connection **conn) __attribute__((warn_unused_result));
+void castle_disconnect            (castle_connection *conn);
+void castle_free                  (castle_connection *conn);
+void castle_request_send          (castle_connection *conn,
+                                   castle_request *req,
+                                   castle_callback *callbacks,
+                                   void **userdatas,
+                                   int reqs_count);
+int castle_request_do_blocking    (castle_connection *conn,
+                                   castle_request *req,
+                                   struct castle_blocking_call *blocking_call);
+int castle_request_do_blocking_multi(castle_connection *conn,
+                                     castle_request *req,
+                                     struct castle_blocking_call *blocking_call,
+                                     int count);
+
+int castle_print_key(FILE *f, castle_key *key);
+int castle_print_request(FILE *f, castle_request *req, int print_values);
+int castle_print_response(FILE *f, castle_response *resp, int print_values);
+
+extern void castle_replace_prepare(castle_request *req, castle_collection collection, castle_key *key, uint32_t key_len, char *value, uint32_t value_len) __attribute__((always_inline));
+ONLY_INLINE void castle_replace_prepare(castle_request *req, castle_collection collection, castle_key *key, uint32_t key_len, char *value, uint32_t value_len) {
+  req->tag = CASTLE_RING_REPLACE;
+  req->replace.collection_id = collection;
+  req->replace.key_ptr = key;
+  req->replace.key_len = key_len;
+  req->replace.value_ptr = value;
+  req->replace.value_len = value_len;
+}
+
+extern void castle_remove_prepare(castle_request *req, castle_collection collection, castle_key *key, uint32_t key_len) __attribute__((always_inline));
+ONLY_INLINE void castle_remove_prepare(castle_request *req, castle_collection collection, castle_key *key, uint32_t key_len) {
+  req->tag = CASTLE_RING_REMOVE;
+  req->replace.collection_id = collection;
+  req->replace.key_ptr = key;
+  req->replace.key_len = key_len;
+}
+
+extern void castle_get_prepare(castle_request *req, castle_collection collection, castle_key *key, uint32_t key_len, char *buffer, uint32_t buffer_len) __attribute__((always_inline));
+ONLY_INLINE void castle_get_prepare(castle_request *req, castle_collection collection, castle_key *key, uint32_t key_len, char *buffer, uint32_t buffer_len) {
+  req->tag = CASTLE_RING_GET;
+  req->get.collection_id = collection;
+  req->get.key_ptr = key;
+  req->get.key_len = key_len;
+  req->get.value_ptr = buffer;
+  req->get.value_len = buffer_len;
+}
+
+extern void castle_iter_start_prepare(castle_request *req, castle_collection collection, castle_key *start_key, uint32_t start_key_len, castle_key *end_key, uint32_t end_key_len, uint64_t flags) __attribute__((always_inline));
+ONLY_INLINE void castle_iter_start_prepare(castle_request *req, castle_collection collection, castle_key *start_key, uint32_t start_key_len, castle_key *end_key, uint32_t end_key_len, uint64_t flags) {
+  req->tag = CASTLE_RING_ITER_START;
+  req->iter_start.collection_id = collection;
+  req->iter_start.start_key_ptr = start_key;
+  req->iter_start.start_key_len = start_key_len;
+  req->iter_start.end_key_ptr = end_key;
+  req->iter_start.end_key_len = end_key_len;
+  req->iter_start.flags = flags;
+}
+
+extern void castle_iter_next_prepare(castle_request *req, castle_token token, char *buffer, uint32_t buffer_len) __attribute__((always_inline));
+ONLY_INLINE void castle_iter_next_prepare(castle_request *req, castle_token token, char *buffer, uint32_t buffer_len) {
+  req->tag = CASTLE_RING_ITER_NEXT;
+  req->iter_next.token = token;
+  req->iter_next.buffer_ptr = buffer;
+  req->iter_next.buffer_len = buffer_len;
+}
+
+extern void castle_iter_finish_prepare(castle_request *req, castle_token token) __attribute__((always_inline));
+ONLY_INLINE void castle_iter_finish_prepare(castle_request *req, castle_token token) {
+  req->tag = CASTLE_RING_ITER_FINISH;
+  req->iter_finish.token = token;
+}
+
+extern void castle_big_put_prepare(castle_request *req, castle_collection collection, castle_key *key, uint32_t key_len, uint64_t value_len) __attribute__((always_inline));
+ONLY_INLINE void castle_big_put_prepare(castle_request *req, castle_collection collection, castle_key *key, uint32_t key_len, uint64_t value_len) {
+  req->tag = CASTLE_RING_BIG_PUT;
+  req->big_put.collection_id = collection;
+  req->big_put.key_ptr = key;
+  req->big_put.key_len = key_len;
+  req->big_put.value_len = value_len;
+}
+
+extern void castle_put_chunk_prepare(castle_request *req, castle_token token, char *buffer, uint32_t buffer_len) __attribute__((always_inline));
+ONLY_INLINE void castle_put_chunk_prepare(castle_request *req, castle_token token, char *buffer, uint32_t buffer_len) {
+  req->tag = CASTLE_RING_PUT_CHUNK;
+  req->put_chunk.token = token;
+  req->put_chunk.buffer_ptr = buffer;
+  req->put_chunk.buffer_len = buffer_len;
+}
+
+extern void castle_big_get_prepare(castle_request *req, castle_collection collection, castle_key *key, uint32_t key_len) __attribute__((always_inline));
+ONLY_INLINE void castle_big_get_prepare(castle_request *req, castle_collection collection, castle_key *key, uint32_t key_len) {
+  req->tag = CASTLE_RING_BIG_GET;
+  req->big_get.collection_id = collection;
+  req->big_get.key_ptr = key;
+  req->big_get.key_len = key_len;
+}
+
+extern void castle_get_chunk_prepare(castle_request *req, castle_token token, char *buffer, uint32_t buffer_len) __attribute__((always_inline));
+ONLY_INLINE void castle_get_chunk_prepare(castle_request *req, castle_token token, char *buffer, uint32_t buffer_len) {
+  req->tag = CASTLE_RING_GET_CHUNK;
+  req->get_chunk.token = token;
+  req->get_chunk.buffer_ptr = buffer;
+  req->get_chunk.buffer_len = buffer_len;
+}
+
+/* Assembles a castle_key at the given location, where buf_len is the
+ * number of bytes allocated, and dims/key_lens/keys are the
+ * parameters of the key. Returns zero on success, or the number of
+ * bytes needed to build the key if it won't fit in buf_len bytes.
+ *
+ * When invoked with key == NULL, buf_len == 0, or keys == NULL, only
+ * returns the necessary size
+ *
+ * When invoked with key_lens == NULL, uses strlen() to compute the lengths
+ */
+uint32_t castle_build_key(castle_key *key, size_t buf_len, int dims, const int *key_lens, const uint8_t * const*keys);
+/* Variation on the theme, always returns the number of bytes needed. Success is when that value is <= buf_len. Mostly just here because java-castle wants it */
+uint32_t castle_build_key_len(castle_key *key, size_t buf_len, int dims, const int *key_lens, const uint8_t * const*keys);
+/* Returns the number of bytes needed for a key with these parameters */
+uint32_t castle_key_bytes_needed(int dims, const int *key_lens, const uint8_t * const*keys) __attribute__((pure));
+
+/* Convenience functions - some of these incur copies */
+
+/* Call as castle_alloca_key("foo") or castle_alloca_key("foo", "bar"). Likely to compile down to nothing. Only really useful when calling the convenience functions */
+#define castle_alloca_key(...) ({                                       \
+      const char *ks[] = { __VA_ARGS__ };                               \
+      uint32_t nr_dims = sizeof(ks) / sizeof(const char *);             \
+      castle_key *okey = alloca(sizeof(*okey) + sizeof(okey->dims[0]) * nr_dims); \
+      int i;                                                            \
+      okey->nr_dims = nr_dims;                                          \
+      for (i = 0; i < nr_dims; i++) {                                   \
+        size_t len = strlen(ks[i]);                                     \
+        castle_key_part *key = alloca(sizeof(*key) + len);              \
+        memcpy(key->key, ks[i], len);                                   \
+        key->length = len;                                              \
+        okey->dims[i] = key;                                            \
+      }                                                                 \
+      okey; })
+
+castle_key *castle_malloc_key(int dims, const int *key_lens, const uint8_t * const*keys) __attribute__((malloc));
+
+extern uint32_t castle_key_dims(const castle_key *key) __attribute__((always_inline));
+ONLY_INLINE uint32_t castle_key_dims(const castle_key *key) {
+  return key->nr_dims;
+}
+
+extern uint32_t castle_key_elem_len(const castle_key *key, int elem) __attribute__((always_inline));
+ONLY_INLINE uint32_t castle_key_elem_len(const castle_key *key, int elem) {
+  return key->dims[elem]->length;
+}
+
+extern const uint8_t *castle_key_elem_data(const castle_key *key, int elem) __attribute__((always_inline));
+ONLY_INLINE const uint8_t *castle_key_elem_data(const castle_key *key, int elem) {
+  return key->dims[elem]->key;
+}
+
+int castle_get             (castle_connection *conn,
+                            castle_collection collection,
+                            castle_key *key,
+                            char **value_out, uint32_t *value_len_out) __attribute__((warn_unused_result));
+int castle_replace         (castle_connection *conn,
+                            castle_collection collection,
+                            castle_key *key,
+                            char *val, uint32_t val_len);
+int castle_remove          (castle_connection *conn,
+                            castle_collection collection,
+                            castle_key *key);
+int castle_iter_start      (castle_connection *conn,
+                            castle_collection collection,
+                            castle_key *start_key,
+                            castle_key *end_key,
+                            castle_token *token_out) __attribute__((warn_unused_result));
+int castle_iter_next       (castle_connection *conn,
+                            castle_token token,
+                            struct castle_key_value_list **kvs,
+                            uint32_t buf_size) __attribute__((warn_unused_result));
+int castle_iter_finish     (castle_connection *conn,
+                            castle_token token);
+int castle_getslice        (castle_connection *conn,
+                            castle_collection collection,
+                            castle_key *start_key,
+                            castle_key *end_key,
+                            struct castle_key_value_list **kvs_out,
+                            uint32_t limit) __attribute__((warn_unused_result));
+void castle_kvs_free       (struct castle_key_value_list *kvs_out);
+int castle_big_put         (castle_connection *conn,
+                            castle_collection collection,
+                            castle_key *key,
+                            uint64_t val_length,
+                            castle_token *token_out);
+int castle_put_chunk       (castle_connection *conn,
+                            castle_token token,
+                            char *value, uint32_t value_len);
+int castle_big_get         (castle_connection *conn,
+                            castle_collection collection,
+                            castle_key *key,
+                            castle_token *token_out, uint64_t *value_len_out) __attribute__((warn_unused_result));
+int castle_get_chunk       (castle_connection *conn,
+                            castle_token token,
+                            char **value_out, uint32_t *value_len_out) __attribute__((warn_unused_result));
+
+/* Control functions - ioctls */
+
+#define C_TYPE_uint32 uint32_t
+#define C_TYPE_uint64 uint64_t
+#define C_TYPE_slave_uuid castle_slave_uuid
+#define C_TYPE_version castle_version
+#define C_TYPE_size size_t
+#define C_TYPE_string const char *
+#define C_TYPE_collection_id castle_collection
+#define C_TYPE_env_var castle_env_var_id
+#define C_TYPE_int int
+#define C_TYPE_int32 int32_t
+
+#define CASTLE_IOCTL_0IN_0OUT(_id, _name)                                                         \
+  int castle_##_id (castle_connection *conn);
+
+#define CASTLE_IOCTL_1IN_0OUT(_id, _name, _arg_1_t, _arg_1)                                       \
+  int castle_##_id (castle_connection *conn, C_TYPE_##_arg_1_t _arg_1);
+
+#define CASTLE_IOCTL_1IN_1OUT(_id, _name, _arg_1_t, _arg_1, _ret_1_t, _ret)                       \
+  int castle_##_id (castle_connection *conn, C_TYPE_##_arg_1_t _arg_1,                            \
+                    C_TYPE_##_ret_1_t * _ret);
+
+#define CASTLE_IOCTL_2IN_0OUT(_id, _name, _arg_1_t, _arg_1, _arg_2_t, _arg_2)                     \
+  int castle_##_id (castle_connection *conn,                                                      \
+                    C_TYPE_##_arg_1_t _arg_1, C_TYPE_##_arg_2_t _arg_2);                          \
+
+#define CASTLE_IOCTL_3IN_1OUT(_id, _name,                                                         \
+                              _arg_1_t, _arg_1, _arg_2_t, _arg_2, _arg_3_t, _arg_3,               \
+                              _ret_1_t, _ret)                                                     \
+  int castle_##_id (castle_connection *conn,                                                      \
+                    C_TYPE_##_arg_1_t _arg_1, C_TYPE_##_arg_2_t _arg_2, C_TYPE_##_arg_3_t _arg_3, \
+                    C_TYPE_##_ret_1_t * _ret);
+
+
+#define CASTLE_IOCTLS                                                                             \
+  CASTLE_IOCTL_1IN_1OUT(                                                                          \
+        claim,                                                                                    \
+        CASTLE_CTRL_CLAIM,                                                                        \
+        uint32, dev, slave_uuid, id)                                                              \
+  CASTLE_IOCTL_1IN_0OUT(                                                                          \
+        release,                                                                                  \
+        CASTLE_CTRL_RELEASE,                                                                      \
+        slave_uuid, id)                                                                           \
+  CASTLE_IOCTL_1IN_1OUT(                                                                          \
+        attach,                                                                                   \
+        CASTLE_CTRL_ATTACH,                                                                       \
+        version, version, uint32, dev)                                                            \
+  CASTLE_IOCTL_1IN_0OUT(                                                                          \
+        detach,                                                                                   \
+        CASTLE_CTRL_DETACH,                                                                       \
+        uint32, dev)                                                                              \
+  CASTLE_IOCTL_1IN_1OUT(                                                                          \
+        snapshot,                                                                                 \
+        CASTLE_CTRL_SNAPSHOT,                                                                     \
+        uint32, dev, version, version)                                                            \
+  CASTLE_IOCTL_3IN_1OUT(                                                                          \
+        collection_attach,                                                                        \
+        CASTLE_CTRL_COLLECTION_ATTACH,                                                            \
+        version, version, string, name, size, name_length, collection_id, collection)             \
+  CASTLE_IOCTL_1IN_0OUT(                                                                          \
+        collection_detach,                                                                        \
+        CASTLE_CTRL_COLLECTION_DETACH,                                                            \
+        collection_id, collection)                                                                \
+  CASTLE_IOCTL_1IN_1OUT(                                                                          \
+        collection_snapshot,                                                                      \
+        CASTLE_CTRL_COLLECTION_SNAPSHOT,                                                          \
+        collection_id, collection, version, version)                                              \
+  CASTLE_IOCTL_1IN_1OUT(                                                                          \
+        create,                                                                                   \
+        CASTLE_CTRL_CREATE,                                                                       \
+        uint64, size, version, id)                                                                \
+  CASTLE_IOCTL_1IN_1OUT(                                                                          \
+        clone,                                                                                    \
+        CASTLE_CTRL_CLONE,                                                                        \
+        version, version, version, clone)                                                         \
+  CASTLE_IOCTL_2IN_0OUT(                                                                          \
+        destroy,                                                                                  \
+        CASTLE_CTRL_DESTROY,                                                                      \
+        version, version, int32, flag)                                                            \
+  CASTLE_IOCTL_0IN_0OUT(                                                                          \
+        init,                                                                                     \
+        CASTLE_CTRL_INIT)                                                                         \
+  CASTLE_IOCTL_2IN_0OUT(                                                                          \
+        fault,                                                                                    \
+        CASTLE_CTRL_FAULT,                                                                        \
+        uint32, fault_id, uint32, fault_arg)                                                      \
+  CASTLE_IOCTL_2IN_0OUT(                                                                          \
+        slave_evacuate,                                                                           \
+        CASTLE_CTRL_SLAVE_EVACUATE,                                                               \
+        slave_uuid, id, uint32, force)                                                            \
+  CASTLE_IOCTL_1IN_0OUT(                                                                          \
+        slave_scan,                                                                               \
+        CASTLE_CTRL_SLAVE_SCAN,                                                                   \
+        uint32, id)                                                                               \
+  CASTLE_IOCTL_1IN_0OUT(                                                                          \
+        thread_priority,                                                                          \
+        CASTLE_CTRL_THREAD_PRIORITY,                                                              \
+        uint32, nice_value)
+
+#define PRIVATE_CASTLE_IOCTLS                                                                     \
+  CASTLE_IOCTL_3IN_1OUT(                                                                          \
+        environment_set,                                                                          \
+        CASTLE_CTRL_ENVIRONMENT_SET,                                                              \
+        env_var, var_id, string, var_str, size, var_len, int, ret)                                \
+  CASTLE_IOCTL_2IN_0OUT(                                                                          \
+        trace_setup,                                                                              \
+        CASTLE_CTRL_TRACE_SETUP,                                                                  \
+        string, dir_str, size, dir_len)                                                           \
+  CASTLE_IOCTL_0IN_0OUT(                                                                          \
+        trace_start,                                                                              \
+        CASTLE_CTRL_TRACE_START)                                                                  \
+  CASTLE_IOCTL_0IN_0OUT(                                                                          \
+        trace_stop,                                                                               \
+        CASTLE_CTRL_TRACE_STOP)                                                                   \
+  CASTLE_IOCTL_0IN_0OUT(                                                                          \
+        trace_teardown,                                                                           \
+        CASTLE_CTRL_TRACE_TEARDOWN)                                                               \
+
+CASTLE_IOCTLS
+PRIVATE_CASTLE_IOCTLS
+
+#undef CASTLE_IOCTL_0IN_0OUT
+#undef CASTLE_IOCTL_1IN_0OUT
+#undef CASTLE_IOCTL_1IN_1OUT
+#undef CASTLE_IOCTL_2IN_0OUT
+#undef CASTLE_IOCTL_3IN_1OUT
+
+uint32_t castle_device_to_devno(const char *filename);
+const char *castle_devno_to_device(uint32_t devno);
+
+/* Convenience methods which don't use the hated device number */
+int castle_claim_dev(castle_connection *conn, const char *filename, castle_slave_uuid *id_out);
+int castle_attach_dev(castle_connection *conn, castle_version version, const char **filename_out) __attribute__((warn_unused_result));
+int castle_detach_dev(castle_connection *conn, const char *filename);
+int castle_snapshot_dev(castle_connection *conn, const char *filename, castle_version *version_out);
+
+uint32_t castle_max_buffer_size(void);
+
+/* Shared buffer pool */
+typedef struct s_castle_shared_pool castle_shared_pool;
+int castle_shared_pool_create(castle_connection* conn, size_t nsizes, size_t* sizes, size_t* quantities, castle_shared_pool** pool_out);
+int castle_shared_pool_destroy(castle_shared_pool* pool);
+int castle_shared_pool_lease(castle_shared_pool* pool, castle_buffer** buffer_out, unsigned long size);
+int castle_shared_pool_release(castle_shared_pool* pool, castle_buffer* buffer, unsigned long size);
+
+/* Collection utils */
+int castle_collection_find(const char* name, castle_collection* collection_out);
+
+#endif /* __CASTLE_FRONT_H__ */
diff --git a/castle_convenience.c b/castle_convenience.c
new file mode 100644
index 0000000..22b12ef
--- /dev/null
+++ b/castle_convenience.c
@@ -0,0 +1,725 @@
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "castle.h"
+
+uint32_t
+castle_build_key_len(castle_key *key, size_t buf_len, int dims, const int *key_lens, const uint8_t * const*keys) {
+  int *lens = (int *)key_lens;
+
+  if (!key_lens && dims) {
+    if (!keys)
+      abort();
+    lens = alloca(dims * sizeof(lens[0]));
+    for (int i = 0; i < dims; i++)
+      lens[i] = strlen((const char *)keys);
+  }
+
+  uint32_t needed = sizeof(castle_key) + sizeof(key->dims[0]) * dims + sizeof(*key->dims[0]) * dims;
+  for (int i = 0; i < dims; i++)
+    needed += lens[i];
+
+  if (!key || buf_len == 0 || !keys || buf_len < needed)
+    return needed;
+
+  key->nr_dims = dims;
+  char *ptr = (char *)key + sizeof(*key) + sizeof(key->dims[0]) * dims;
+  for (int i = 0; i < dims; i++) {
+    key->dims[i] = (c_vl_key_t *)ptr;
+    key->dims[i]->length = lens[i];
+    memcpy(key->dims[i]->key, keys[i], lens[i]);
+    ptr += sizeof(*key->dims[i]) + lens[i];
+  }
+
+  assert(ptr - (char *)key == (int64_t)needed);
+
+  return needed;
+}
+
+uint32_t
+castle_build_key(castle_key *key, size_t buf_len, int dims, const int *key_lens, const uint8_t * const*keys) {
+  uint32_t needed = castle_build_key_len(key, buf_len, dims, key_lens, keys);
+  if (needed <= buf_len)
+    return 0;
+  else
+    return needed;
+}
+
+uint32_t
+castle_key_bytes_needed(int dims, const int *key_lens, const uint8_t * const*keys) {
+  return castle_build_key(NULL, 0, dims, key_lens, keys);
+}
+
+castle_key *
+castle_malloc_key(int dims, const int *key_lens, const uint8_t * const*keys) {
+  uint32_t len = castle_key_bytes_needed(dims, key_lens, keys);
+  castle_key *key = malloc(len);
+  if (!key)
+    return NULL;
+  if (0 != castle_build_key(key, len, dims, key_lens, keys))
+    abort();
+  return key;
+}
+
+static int make_key_buffer(castle_connection *conn, castle_key *key, uint32_t extra_space, char **key_buf_out, uint32_t *key_len_out) {
+  int dims = key->nr_dims;
+  int lens[dims];
+  uint8_t *keys[dims];
+  char *key_buf;
+  uint32_t key_len;
+  int err;
+
+  for (int i = 0; i < dims; i++) {
+    lens[i] = key->dims[i]->length;
+    keys[i] = key->dims[i]->key;
+  }
+
+  key_len = castle_key_bytes_needed(dims, lens, NULL);
+
+  err = castle_shared_buffer_create(conn, &key_buf, key_len + extra_space);
+  if (err)
+    return err;
+
+  {
+    int r = castle_build_key((castle_key *)key_buf, key_len, dims, lens, (const uint8_t *const *)keys);
+    if (r != 0)
+      /* impossible */
+      abort();
+  }
+
+  *key_buf_out = key_buf;
+  *key_len_out = key_len;
+  return 0;
+}
+
+static int make_2key_buffer(castle_connection *conn, castle_key *key1, castle_key *key2, char **key_buf_out, uint32_t *key1_len_out, uint32_t *key2_len_out) {
+  int dims1 = key1->nr_dims;
+  int dims2 = key2->nr_dims;
+  int lens1[dims1];
+  int lens2[dims2];
+  uint8_t *keys1[dims1];
+  uint8_t *keys2[dims2];
+  char *key_buf;
+  uint32_t key1_len;
+  uint32_t key2_len;
+  int err;
+
+  for (int i = 0; i < dims1; i++) {
+    lens1[i] = key1->dims[i]->length;
+    keys1[i] = key1->dims[i]->key;
+  }
+
+  for (int i = 0; i < dims2; i++) {
+    lens2[i] = key2->dims[i]->length;
+    keys2[i] = key2->dims[i]->key;
+  }
+
+  key1_len = castle_key_bytes_needed(dims1, lens1, NULL);
+  key2_len = castle_key_bytes_needed(dims2, lens2, NULL);
+
+  err = castle_shared_buffer_create(conn, &key_buf, key1_len + key2_len);
+  if (err)
+    return err;
+
+  {
+    int r = castle_build_key((castle_key *)key_buf, key1_len, dims1, lens1, (const uint8_t *const *)keys1);
+    if (r != 0)
+      /* impossible */
+      abort();
+  }
+
+  {
+    int r = castle_build_key((castle_key *)(key_buf + key1_len), key2_len, dims2, lens2, (const uint8_t *const *)keys2);
+    if (r != 0)
+      /* impossible */
+      abort();
+  }
+
+  *key_buf_out = key_buf;
+  *key1_len_out = key1_len;
+  *key2_len_out = key2_len;
+  return 0;
+}
+
+/* These two functions are for copying keys out of shared buffers; keys supplied by the user are not contiguous */
+
+/*
+ * Assumes key is contiguous in memory
+ */
+static int copy_key(c_vl_okey_t *key, void *buf, uint32_t key_len)
+{
+     c_vl_okey_t *new_key = buf;
+     unsigned int i;
+
+     memcpy(buf, key, key_len);
+
+     if ((new_key->nr_dims * sizeof(c_vl_key_t *)) > (key_len - sizeof(c_vl_okey_t)))
+     {
+         return -EINVAL;
+     }
+
+     for (i=0; i < new_key->nr_dims; i++)
+         new_key->dims[i] = (void *) (((unsigned long) new_key->dims[i]) -
+           ((unsigned long) key) + ((unsigned long) buf));
+
+     return 0;
+}
+
+#define max(_a, _b) ((_a) > (_b) ? (_a) : (_b))
+
+/*
+ * Assumes key is contiguous in memory
+ */
+static uint32_t get_key_len(c_vl_okey_t *key)
+{
+
+    uint32_t i;
+    unsigned long end = 0;
+
+    for (i=0; i < key->nr_dims; i++)
+        end = max(end, ((unsigned long) key->dims[i]) + sizeof(c_vl_key_t) + key->dims[i]->length);
+
+    return end - (unsigned long) key;
+}
+
+int castle_get(castle_connection *conn,
+               collection_id_t collection,
+               castle_key *key,
+               char **value_out, uint32_t *value_len_out)
+{
+    struct castle_blocking_call call;
+    castle_request_t req;
+    char *key_buf, *val_buf;
+    int err = 0;
+    uint32_t key_len;
+    uint32_t val_len = PAGE_SIZE;
+    char *value;
+
+    err = make_key_buffer(conn, key, 0, &key_buf, &key_len);
+    if (err) goto err0;
+
+    err = castle_shared_buffer_create(conn, &val_buf, val_len);
+    if (err) goto err1;
+
+    castle_get_prepare(&req, collection, (castle_key *) key_buf,
+        key_len, val_buf, val_len);
+
+    err = castle_request_do_blocking(conn, &req, &call);
+    if (err) goto err2;
+
+    if (call.length > val_len)
+    {
+        castle_interface_token_t token;
+        uint64_t val_len_64;
+        uint32_t remaining, buf_len;
+        char *buf;
+
+        err = castle_big_get(conn, collection, key, &token, &val_len_64);
+        if (err) goto err2;
+
+        /* We can't assign val_len_64 to value_len_out unless val_len_64 fits */
+        if (val_len_64 > UINT32_MAX) {
+            err = -EFBIG;
+            goto err1;
+        }
+
+        value = malloc(val_len_64);
+        if (!value)
+        {
+            err = -ENOMEM;
+            goto err2;
+        }
+
+        remaining = val_len_64;
+
+        while (remaining > 0)
+        {
+            err = castle_get_chunk(conn, token, &buf, &buf_len);
+            if (err)
+            {
+                free(value);
+                goto err2;
+            }
+
+            memcpy(value + (val_len_64 - remaining), buf, buf_len);
+            free(buf);
+            remaining -= buf_len;
+        }
+
+        *value_len_out = val_len_64;
+        *value_out = value;
+    }
+    else
+    {
+        assert(call.length <= UINT32_MAX);
+        value = malloc(call.length);
+        if (!value)
+        {
+            err = -ENOMEM;
+            goto err2;
+        }
+
+        memcpy(value, val_buf, call.length);
+
+        *value_len_out = call.length;
+        *value_out = value;
+    }
+
+err2: castle_shared_buffer_destroy(conn, val_buf, val_len);
+err1: castle_shared_buffer_destroy(conn, key_buf, key_len);
+err0: return err;
+}
+
+int castle_replace(castle_connection *conn,
+                   collection_id_t collection,
+                   castle_key *key,
+                   char *val, uint32_t val_len)
+{
+    struct castle_blocking_call call;
+    castle_request_t req;
+    char *buf;
+    uint32_t key_len;
+    int err = 0;
+
+    err = make_key_buffer(conn, key, val_len, &buf, &key_len);
+    if (err) goto err0;
+
+    memcpy(buf + key_len, val, val_len);
+
+    castle_replace_prepare(&req, collection, (castle_key *) buf,
+        key_len, buf + key_len, val_len);
+
+    err = castle_request_do_blocking(conn, &req, &call);
+    if (err) goto err1;
+
+err1: castle_shared_buffer_destroy(conn, buf, key_len + val_len);
+err0: return err;
+}
+
+int castle_remove(castle_connection *conn,
+                  collection_id_t collection,
+                  castle_key *key)
+{
+    struct castle_blocking_call call;
+    castle_request_t req;
+    char *key_buf;
+    uint32_t key_len;
+    int err = 0;
+
+    err = make_key_buffer(conn, key, 0, &key_buf, &key_len);
+    if (err) goto err0;
+
+    castle_remove_prepare(&req, collection,
+        (castle_key *) key_buf,  key_len);
+
+    err = castle_request_do_blocking(conn, &req, &call);
+    if (err) goto err1;
+
+err1: castle_shared_buffer_destroy(conn, key_buf, key_len);
+err0: return err;
+}
+
+int castle_iter_start(castle_connection *conn,
+                      collection_id_t collection,
+                      castle_key *start_key,
+                      castle_key *end_key,
+                      castle_interface_token_t *token_out)
+{
+    struct castle_blocking_call call;
+    castle_request_t req;
+    char *key_buf;
+    uint32_t start_key_len;
+    uint32_t end_key_len;
+    int err = 0;
+
+    *token_out = 0;
+
+    err = make_2key_buffer(conn, start_key, end_key, &key_buf, &start_key_len, &end_key_len);
+    if (err) goto err0;
+
+    castle_iter_start_prepare(&req, collection,
+        (castle_key *) key_buf,  start_key_len,
+        (castle_key *) ((unsigned long)key_buf + (unsigned long)start_key_len),  end_key_len,
+        CASTLE_RING_ITER_FLAG_NONE);
+
+    err = castle_request_do_blocking(conn, &req, &call);
+    if (err) goto err1;
+
+    *token_out = call.token;
+
+err1: castle_shared_buffer_destroy(conn, key_buf, start_key_len + end_key_len);
+err0: return err;
+}
+
+void castle_kvs_free(struct castle_key_value_list *kvs)
+{
+    while (kvs)
+    {
+        struct castle_key_value_list *next = kvs->next;
+
+        if (kvs->key) free(kvs->key);
+        if (kvs->val->val) free(kvs->val->val);
+        if (kvs->val) free(kvs->val);
+        free(kvs);
+
+        kvs = next;
+    }
+}
+
+int castle_iter_next(castle_connection *conn,
+                     castle_interface_token_t token,
+                     struct castle_key_value_list **kvs,
+                     uint32_t buf_size)
+{
+    struct castle_blocking_call call;
+    castle_request_t req;
+    struct castle_key_value_list *head = NULL, *tail = NULL, *copy = NULL, *curr = NULL;
+    char *buf;
+    int err = 0;
+
+    *kvs = NULL;
+
+    err = castle_shared_buffer_create(conn, &buf, buf_size);
+    if (err) goto err0;
+
+    castle_iter_next_prepare(&req, token, buf, buf_size);
+
+    err = castle_request_do_blocking(conn, &req, &call);
+    if (err) goto err1;
+
+    curr = (struct castle_key_value_list *)buf;
+
+    // NULL first key means no entries
+    if (curr->key == NULL)
+    {
+        head = NULL;
+    }
+    else
+    {
+        while (curr != NULL)
+        {
+            unsigned long key_len = get_key_len(curr->key);
+
+            copy = calloc(1, sizeof(*copy));
+            if (!copy)
+            {
+                err = -ENOMEM;
+                goto err2;
+            }
+
+            copy->key = malloc(key_len);
+            if (!copy->key)
+            {
+                err = -ENOMEM;
+                goto err2;
+            }
+            err = copy_key(curr->key, copy->key, key_len);
+            if (err) goto err2;
+
+            copy->val = malloc(sizeof(*(copy->val)));
+            if (!copy->val)
+            {
+                err = -ENOMEM;
+                goto err2;
+            }
+            memcpy(copy->val, curr->val, sizeof(*(copy->val)));
+
+            if (curr->val->type & CVT_TYPE_INLINE)
+            {
+                copy->val->val = malloc(copy->val->length);
+                if (!copy->val->val)
+                {
+                    err = -ENOMEM;
+                    goto err2;
+                }
+                memcpy(copy->val->val, curr->val->val, copy->val->length);
+            }
+            else
+            {
+                char *val;
+                uint32_t val_len;
+                err = castle_get(conn, curr->val->collection_id, curr->key, &val, &val_len);
+                if (err)
+                    goto err2;
+                copy->val->length = val_len;
+                copy->val->val = (uint8_t *)val;
+
+                /* pretend it is inline since the value is now 'inline' */
+                copy->val->type = CVT_TYPE_INLINE;
+            }
+
+            if (!head)
+                head = copy;
+            else
+                tail->next = copy;
+
+            tail = copy;
+            curr = curr->next;
+        }
+    }
+
+    *kvs = head;
+
+    castle_shared_buffer_destroy(conn, buf, buf_size);
+
+    return 0;
+
+err2: castle_kvs_free(head);
+err1: castle_shared_buffer_destroy(conn, buf, buf_size);
+err0: return err;
+}
+
+int castle_iter_finish(castle_connection *conn,
+                       castle_token token)
+{
+    struct castle_blocking_call call;
+    castle_request_t req;
+    int err = 0;
+
+    castle_iter_finish_prepare(&req, token);
+
+    err = castle_request_do_blocking(conn, &req, &call);
+
+    return err;
+}
+
+// 'limit' means the maximum number of values to retrieve. 0 means unlimited.
+int castle_getslice(castle_connection *conn,
+                    collection_id_t collection,
+                    castle_key *start_key,
+                    castle_key *end_key,
+                    struct castle_key_value_list **kvs_out,
+                    uint32_t limit)
+{
+    castle_token token;
+    int ret;
+    uint32_t count = 0;
+    struct castle_key_value_list *head = NULL, *tail = NULL, *curr = NULL;
+
+    ret = castle_iter_start(conn, collection, start_key,
+        end_key, &token);
+    if (ret) goto err0;
+
+    while (!ret  && (limit == 0 || count < limit))
+    {
+        ret = castle_iter_next(conn, token, &curr, PAGE_SIZE);
+        if (ret) goto err1;
+
+        if (!curr)
+            break;
+
+        if (!head)
+            head = curr;
+        else
+            tail->next = curr;
+
+        while (curr)
+        {
+            count++;
+            if (count == limit)
+            {
+                if (curr->next)
+                    castle_kvs_free(curr->next);
+                curr->next = NULL;
+                break;
+            }
+            tail = curr; // tail will be one behind curr
+            curr = curr->next;
+        }
+    }
+
+    ret = castle_iter_finish(conn, token);
+    if (ret)
+        goto err1;
+
+    *kvs_out = head;
+
+    return ret;
+
+err1:
+    if (head)
+        castle_kvs_free(head);
+err0:
+    return ret;
+}
+
+int castle_big_put         (castle_connection *conn,
+                            collection_id_t collection,
+                            castle_key *key,
+                            uint64_t val_length,
+                            castle_interface_token_t *token_out)
+{
+    struct castle_blocking_call call;
+    castle_request_t req;
+    char *key_buf;
+    uint32_t key_len;
+    int err = 0;
+
+    *token_out = 0;
+
+    err = make_key_buffer(conn, key, 0, &key_buf, &key_len);
+    if (err) goto err0;
+
+    castle_big_put_prepare(&req, collection,
+        (castle_key *) key_buf,  key_len, val_length);
+
+    err = castle_request_do_blocking(conn, &req, &call);
+    if (err) goto err1;
+
+    *token_out = call.token;
+
+err1: castle_shared_buffer_destroy(conn, key_buf, key_len);
+err0: return err;
+}
+
+int castle_put_chunk       (castle_connection *conn,
+                            castle_interface_token_t token,
+                            char *value, uint32_t value_len)
+{
+    struct castle_blocking_call call;
+    castle_request_t req;
+    char *buf;
+    int err = 0;
+
+    err = castle_shared_buffer_create(conn, &buf, value_len);
+    if (err) goto err0;
+
+    memcpy(buf, value, value_len);
+
+    castle_put_chunk_prepare(&req, token, buf, value_len);
+
+    err = castle_request_do_blocking(conn, &req, &call);
+    if (err) goto err1;
+
+    err1: castle_shared_buffer_destroy(conn, buf, value_len);
+    err0: return err;
+}
+
+int castle_big_get         (castle_connection *conn,
+                            collection_id_t collection,
+                            castle_key *key,
+                            castle_interface_token_t *token_out, uint64_t *value_len_out)
+{
+    struct castle_blocking_call call;
+    castle_request_t req;
+    char *key_buf;
+    uint32_t key_len;
+    int err = 0;
+
+    *token_out = 0;
+
+    err = make_key_buffer(conn, key, 0, &key_buf, &key_len);
+    if (err) goto err0;
+
+    castle_big_get_prepare(&req, collection,
+        (castle_key *) key_buf,  key_len);
+
+    err = castle_request_do_blocking(conn, &req, &call);
+    if (err) goto err1;
+
+    *token_out = call.token;
+    *value_len_out = call.length;
+
+err1: castle_shared_buffer_destroy(conn, key_buf, key_len);
+err0: return err;
+}
+
+#define VALUE_LEN (1024 * 1024)
+int castle_get_chunk       (castle_connection *conn,
+                            castle_interface_token_t token,
+                            char **value_out, uint32_t *value_len_out)
+{
+    struct castle_blocking_call call;
+    castle_request_t req;
+    char *buf;
+    char *value;
+    int err = 0;
+
+    *value_out = NULL;
+
+    err = castle_shared_buffer_create(conn, &buf, VALUE_LEN);
+    if (err) goto err0;
+
+    castle_get_chunk_prepare(&req, token, buf, VALUE_LEN);
+
+    err = castle_request_do_blocking(conn, &req, &call);
+    if (err) goto err1;
+
+    value = malloc(VALUE_LEN);
+    memcpy(value, buf, VALUE_LEN);
+
+    *value_out = value;
+    *value_len_out = call.length;
+
+    err1: castle_shared_buffer_destroy(conn, buf, VALUE_LEN);
+    err0: return err;
+}
+
+uint32_t
+castle_device_to_devno(const char *filename) {
+  struct stat st;
+  if (0 != stat(filename, &st))
+    return 0;
+
+  return st.st_rdev;
+}
+
+static char **devnames = NULL;
+static int devname_count = 0;
+
+static void
+alloc_devnames_to(int minor) {
+  if (devname_count > minor)
+    return;
+
+  int old_devname_count = devname_count;
+  devname_count = minor + 1;
+  devnames = realloc(devnames, devname_count * sizeof(devnames[0]));
+  for (int i = old_devname_count; i < devname_count; i++) {
+    if (-1 == asprintf(&devnames[i], "/dev/castle-fs/castle-fs-%d", i))
+      abort();
+  }
+}
+
+const char *
+castle_devno_to_device(uint32_t devno) {
+  int minor = minor(devno);
+  /* This is a bit wrong, but it'll do for now. castle-fs gets some
+     arbitrary major assigned, and then names its devices based on the
+     minor. We can find the path from that */
+
+  alloc_devnames_to(minor);
+  return devnames[minor];
+}
+
+int
+castle_claim_dev(castle_connection *conn, const char *filename, castle_slave_uuid *id_out) {
+  return castle_claim(conn, castle_device_to_devno(filename), id_out);
+}
+
+int
+castle_attach_dev(castle_connection *conn, castle_version version, const char **filename_out) {
+  uint32_t devno;
+  int ret = castle_attach(conn, version, &devno);
+  if (ret == 0)
+    *filename_out = castle_devno_to_device(devno);
+  return ret;
+}
+
+int
+castle_detach_dev(castle_connection *conn, const char *filename) {
+  return castle_detach(conn, castle_device_to_devno(filename));
+}
+
+int
+castle_snapshot_dev(castle_connection *conn, const char *filename, castle_version *version_out) {
+  return castle_snapshot(conn, castle_device_to_devno(filename), version_out);
+}
diff --git a/castle_front.c b/castle_front.c
new file mode 100644
index 0000000..c72ff8c
--- /dev/null
+++ b/castle_front.c
@@ -0,0 +1,641 @@
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/select.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <string.h>
+#include <assert.h>
+#include <stdbool.h>
+
+#include "castle_public.h"
+#include "castle.h"
+
+#include "castle_private.h"
+
+//#define DEBUG
+#ifndef DEBUG
+#define debug(_f, ...)    ((void)0)
+#else
+#define debug(_f, _a...)  (printf(_f, ##_a))
+#endif
+
+#define atomic_inc(x) ({int z __attribute__((unused)); z = __sync_fetch_and_add(x, 1); })
+#define atomic_dec(x) ({int z __attribute__((unused)); z = __sync_fetch_and_sub(x, 1); })
+
+static pthread_mutex_t blocking_call_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t  blocking_call_cond  = PTHREAD_COND_INITIALIZER;
+
+static void *castle_response_thread(void *data)
+{
+    castle_connection *conn = data;
+    castle_response_t *resp;
+    RING_IDX i, rp;
+    fd_set readfds;
+    int ret, more_to_do;
+    int max_fd = conn->select_pipe[0] > conn->fd ? conn->select_pipe[0] : conn->fd;
+
+    while (!conn->response_thread_exit)
+    {
+        debug("pre-select %d\n", conn->fd);
+
+        /* select destroys readfds, so must create it every time */
+        FD_ZERO(&readfds);
+        FD_SET(conn->fd, &readfds);
+        FD_SET(conn->select_pipe[0], &readfds);
+
+        ret = select(max_fd + 1, &readfds, (fd_set *)NULL, (fd_set *)NULL, NULL);
+        if (ret <= 0)
+        {
+            debug("select returned %d\n", ret);
+            continue;
+        }
+
+        if (conn->response_thread_exit)
+            break;
+
+#ifdef TRACE
+        selects_counter++;
+#endif
+
+        debug("post-select\n");
+
+        do {
+          /* rsp_prod is written from the kernel, but in a strictly
+             ordered way and it fits inside a cache line. Reading it
+             at any point is safe */
+          rp = conn->front_ring.sring->rsp_prod;
+
+          /* This memory barrier is copied from Xen, which runs on
+             powerpc, which has weak memory ordering. We only run on
+             amd64, which has strong memory ordering: in particular,
+             reads are never reordered with respect to other reads. We
+             suspect that this is nothing more than a waste of a few
+             hundred cycles. Revisit this if we have performance
+             issues here - perhaps it can be safely removed */
+          xen_rmb();
+
+          /* rsp_cons is safe for concurrency; only read or written from this thread */
+          for (i = conn->front_ring.rsp_cons; i != rp; i++) {
+            resp = RING_GET_RESPONSE(&conn->front_ring, i);
+
+            if (__builtin_expect(conn->debug_log != NULL, 0)) {
+              flockfile(conn->debug_log);
+              castle_print_response(conn->debug_log, resp, conn->debug_values);
+              fprintf(conn->debug_log, "\n");
+              fflush(conn->debug_log);
+              funlockfile(conn->debug_log);
+            }
+
+            if(conn->callbacks[resp->call_id].callback)
+                conn->callbacks[resp->call_id].callback(conn, resp, conn->callbacks[resp->call_id].data);
+
+            if (conn->callbacks[resp->call_id].token) {
+              unsigned int x = conn->callbacks[resp->call_id].token % CASTLE_STATEFUL_OPS;
+              assert(x < CASTLE_STATEFUL_OPS);
+              assert(conn->outstanding_stateful_requests[x] > 0);
+              int new = __sync_sub_and_fetch(&conn->outstanding_stateful_requests[x], 1);
+              if (new == 0)
+                atomic_inc(&conn->front_ring.reserved);
+            }
+
+            pthread_mutex_lock(&conn->free_mutex);
+            list_add(&conn->callbacks[resp->call_id].list, &conn->free_callbacks);
+            pthread_mutex_unlock(&conn->free_mutex);
+
+            debug("Got response %d\n", resp->call_id);
+
+#ifdef TRACE
+            ops_counter++;
+#endif
+          }
+
+          conn->front_ring.rsp_cons = i;
+          assert(conn->front_ring.reserved <= RING_FREE_REQUESTS(&conn->front_ring));
+
+          RING_FINAL_CHECK_FOR_RESPONSES(&conn->front_ring, more_to_do);
+
+          pthread_mutex_lock(&conn->ring_mutex);
+          pthread_cond_broadcast(&conn->ring_cond);
+          pthread_mutex_unlock(&conn->ring_mutex);
+
+        } while (more_to_do);
+    }
+
+    debug("castle_response_thread exiting...\n");
+
+    pthread_mutex_lock(&conn->ring_mutex);
+    conn->response_thread_running = 0;
+    pthread_cond_broadcast(&conn->ring_cond);
+    pthread_mutex_unlock(&conn->ring_mutex);
+
+    return NULL;
+}
+
+int castle_shared_buffer_create(castle_connection *conn,
+                                char **buffer_out, unsigned long size)
+{
+    void *buffer;
+    buffer = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, conn->fd, 0);
+    if (buffer == MAP_FAILED)
+    {
+        debug("Failed to map page %d\n", errno);
+        return -errno;
+    }
+
+    // TODO keep track of buffers to free up
+
+    *buffer_out = buffer;
+
+    return 0;
+}
+
+int castle_shared_buffer_destroy(castle_connection *conn __attribute__((unused)),
+                                 char *buffer, unsigned long size)
+{
+    int ret = munmap(buffer, size);
+
+    if (ret == -1)
+      return -errno;
+
+    return 0;
+}
+
+int castle_shared_buffer_allocate(castle_connection *conn,
+                                  castle_buffer **buffer_out, unsigned long size)
+{
+    castle_buffer* buffer = calloc(1, sizeof(*buffer));
+    if(!buffer)
+        return -ENOMEM;
+
+    int rc = 0;
+    if((rc = castle_shared_buffer_create(conn, &buffer->buf, size)))
+    {
+        debug("Failed to create shared buffer: %d\n", rc);
+        free(buffer);
+        return rc;
+    }
+
+    buffer->buflen = size;
+    *buffer_out = buffer;
+    return 0;
+}
+
+int castle_shared_buffer_release(castle_connection *conn, castle_buffer* buffer)
+{
+    int rc = castle_shared_buffer_destroy(conn, buffer->buf, buffer->buflen);
+    if(!rc)
+        free(buffer);
+    return rc;
+}
+
+static int set_non_blocking(int fd)
+{
+    int flags;
+
+    if (-1 == (flags = fcntl(fd, F_GETFL, 0)))
+        flags = 0;
+
+    return fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+}
+
+int castle_connect(castle_connection **conn_out)
+{
+    int err;
+    castle_connection *conn = calloc(1, sizeof(*conn));
+
+    *conn_out = NULL;
+
+    if (!conn)
+    {
+        debug("Failed to malloc\n");
+        err = -ENOMEM;
+        goto err0;
+    }
+
+    conn->fd = open(CASTLE_NODE, O_RDWR);
+    if (conn->fd == -1)
+    {
+        debug("Failed to open %s, errno=%d (\"%s\")\n",
+            CASTLE_NODE, errno, strerror(errno));
+        err = -errno;
+        goto err1;
+    }
+    debug("Got fd %d\n", conn->fd);
+
+    {
+      int version = castle_protocol_version(conn);
+      if (version != CASTLE_PROTOCOL_VERSION) {
+        debug("Protocol version mismatch (kernel %d, libcastle %d)\n", version, CASTLE_PROTOCOL_VERSION);
+        err = -ENOPROTOOPT;
+        goto err2;
+      }
+    }
+
+    conn->shared_ring = mmap(NULL, CASTLE_RING_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, conn->fd, 0);
+    if (conn->shared_ring == MAP_FAILED)
+    {
+        debug("Failed to map page errno=%d (\"%s\")\n",
+            errno, strerror(errno));
+        err = -errno;
+        goto err2;
+    }
+    debug("Got shared ring at address %p\n", conn->shared_ring);
+
+    FRONT_RING_INIT(&conn->front_ring, conn->shared_ring, CASTLE_RING_SIZE, CASTLE_STATEFUL_OPS);
+
+    conn->callbacks = malloc(sizeof(struct castle_front_callback) * RING_SIZE(&conn->front_ring));
+    if (!conn->callbacks)
+    {
+        debug("Failed to malloc callbacks!");
+        err = -ENOMEM;
+        goto err3;
+    }
+
+    INIT_LIST_HEAD(&conn->free_callbacks);
+    for (unsigned int i=0; i<RING_SIZE(&conn->front_ring); i++)
+        list_add(&conn->callbacks[i].list, &conn->free_callbacks);
+
+    err = pthread_mutex_init(&conn->free_mutex, NULL);
+    if (err)
+    {
+        debug("Failed to create mutex, err=%d\n", err);
+        err = -err;
+        goto err4;
+    }
+
+    err = pthread_mutex_init(&conn->ring_mutex, NULL);
+    if (err)
+    {
+        debug("Failed to create mutex, err=%d\n", err);
+        err = -err;
+        goto err5;
+    }
+    debug("Initialised mutex\n");
+
+    err = pthread_cond_init(&conn->ring_cond, NULL);
+    if (err)
+    {
+        debug("Failed to create condition, err=%d\n", err);
+        err = -err;
+        goto err6;
+    }
+    debug("Initialise condition\n");
+
+    if (pipe(conn->select_pipe) == -1)
+    {
+        debug("Failed to create pipe to unblock select, errno=%d (\"%s\")",
+            errno, strerror(errno));
+        err = -errno;
+        goto err7;
+    }
+
+    if (set_non_blocking(conn->select_pipe[0]) == -1)
+    {
+        debug("Failed to set non-block on fd %d, errno=%d (\"%s\")",
+            conn->select_pipe[0], errno, strerror(errno));
+        err = -errno;
+        goto err8;
+    }
+
+    if (set_non_blocking(conn->select_pipe[1]) == -1)
+    {
+        debug("Failed to set non-block on fd %d, errno=%d (\"%s\")",
+            conn->select_pipe[1], errno, strerror(errno));
+        err = -errno;
+        goto err8;
+    }
+
+    {
+      const char *debug_env = getenv("CASTLE_DEBUG");
+      if (debug_env) {
+        const char *debug_file = getenv("CASTLE_DEBUG_FILE");
+        const char *debug_fd = getenv("CASTLE_DEBUG_FD");
+        if (debug_file)
+          conn->debug_log = fopen(debug_file, "a");
+        else if (debug_fd)
+          conn->debug_log = fdopen(atoi(debug_fd), "a");
+        else {
+          int err_fd = dup(2);
+          if (-1 != err_fd)
+            conn->debug_log = fdopen(err_fd, "a");
+        }
+        if (getenv("CASTLE_DEBUG_VALUES"))
+          conn->debug_values = 1;
+        else
+          conn->debug_values = 0;
+      }
+      else
+        conn->debug_log = NULL;
+    }
+
+    conn->response_thread_running = 1;
+    conn->response_thread_exit = 0;
+    err = pthread_create(&conn->response_thread, NULL, castle_response_thread, conn);
+    if (err)
+    {
+        debug("Failed to create response thread, err=%d\n", err);
+        err = -err;
+        goto err9;
+    }
+    debug("Response thread started\n");
+
+    *conn_out = conn;
+
+    return 0;
+
+err9: fclose(conn->debug_log);
+err8: close(conn->select_pipe[0]); close(conn->select_pipe[1]);
+err7: pthread_cond_destroy(&conn->ring_cond);
+err6: pthread_mutex_destroy(&conn->ring_mutex);
+err5: pthread_mutex_destroy(&conn->free_mutex);
+err4: free(conn->callbacks);
+err3: munmap(conn->shared_ring, CASTLE_RING_SIZE);
+err2: close(conn->fd);
+err1: free(conn);
+err0: return err;
+}
+
+void castle_disconnect(castle_connection *conn)
+{
+    ssize_t write_ret;
+
+    if (!conn)
+      return;
+
+    if (conn->fd == -1)
+      return;
+
+    /* It doesn't matter that this flag is not protected by the lock
+     * as long as the response thread eventually notices, and by
+     * writing to the pipe the select will now never block, so it
+     * should wake it up and notice eventually */
+    conn->response_thread_exit = 1;
+    write_ret = write(conn->select_pipe[1], "\0", 1);
+    if (write_ret < 0)
+        printf("write failed in castle_front_disconnect, error %d.\n", errno);
+
+    /* Wait for the response thread to go away */
+    pthread_mutex_lock(&conn->ring_mutex);
+    while(conn->response_thread_running)
+        pthread_cond_wait(&conn->ring_cond, &conn->ring_mutex);
+    pthread_mutex_unlock(&conn->ring_mutex);
+
+    // TODO: free buffers / wait for them to be free'd?
+
+    pthread_mutex_lock(&conn->ring_mutex);
+    munmap(conn->shared_ring, CASTLE_RING_SIZE);
+    close(conn->fd);
+    conn->fd = -1;
+    pthread_mutex_unlock(&conn->ring_mutex);
+
+    pthread_mutex_lock(&blocking_call_mutex);
+    pthread_cond_broadcast(&blocking_call_cond);
+    pthread_mutex_unlock(&blocking_call_mutex);
+
+    close(conn->select_pipe[0]); close(conn->select_pipe[1]);
+}
+
+void castle_free(castle_connection *conn)
+{
+    if (!conn)
+      return;
+
+    if (conn->fd >= 0)
+      castle_disconnect(conn);
+
+    pthread_cond_destroy(&conn->ring_cond);
+    pthread_mutex_destroy(&conn->ring_mutex);
+    pthread_mutex_destroy(&conn->free_mutex);
+    free(conn->callbacks);
+    free(conn);
+}
+
+static castle_interface_token_t
+get_request_token(castle_request_t *req) {
+  switch (req->tag) {
+  case CASTLE_RING_ITER_NEXT:
+    return req->iter_next.token;
+  case CASTLE_RING_ITER_FINISH:
+    return req->iter_finish.token;
+  case CASTLE_RING_PUT_CHUNK:
+    return req->put_chunk.token;
+  case CASTLE_RING_GET_CHUNK:
+    return req->get_chunk.token;
+  default:
+    return 0;
+  }
+}
+
+static bool
+ring_full_for(castle_connection *conn, castle_request_t *req) {
+  castle_interface_token_t token = get_request_token(req);
+
+  if (token) {
+    unsigned int x = token % CASTLE_STATEFUL_OPS;
+    assert(x < CASTLE_STATEFUL_OPS);
+    if (conn->outstanding_stateful_requests[x] == 0)
+      return false;
+  }
+
+  int space = RING_FREE_REQUESTS(&conn->front_ring);
+  int reserved = conn->front_ring.reserved;
+  /* space < reserved when we've bumped the reserve count for a new
+     reponse but haven't updated the ring yet */
+  return space <= reserved;
+}
+
+void castle_request_send(castle_connection *conn,
+                               castle_request_t *req, castle_callback *callbacks,
+                               void **datas, int reqs_count)
+{
+    // TODO check return codes?
+    int notify, i=0, call_id;
+    struct castle_front_callback *callback;
+
+    /* This mutex is currently being abused for two distinct purposes,
+       creating false scheduling hazards: it is both the condition
+       variable mutex for ring_cond, which is used for signalling
+       between the response thread and this function, and it is also
+       used to protect the value req_prod_pvt from simultaneous
+       executions of this function.
+
+       TODO: break it apart into two mutexes
+
+       TODO+1: change req_prod_pvt to be a lock-free atomic
+       compare-and-set mechanism instead of using mutexes, so that
+       multiple threads can write to the ring without context switches
+    */
+    pthread_mutex_lock(&conn->ring_mutex);
+
+    while (i < reqs_count)
+    {
+      if (conn->fd < 0)
+        break;
+
+      /* RING_FULL is based on nr_ents (safe), rsp_cons (written only
+         by the response thread and always within a cache line, hence
+         safe), and req_prod_pvt (currently a concurrency hazard due
+         to lack of atomic compare-and-set logic) */
+
+      while (ring_full_for(conn, &req[i]))
+            pthread_cond_wait(&conn->ring_cond, &conn->ring_mutex);
+
+        /* Another RING_FULL hazard on req_prod_pvt */
+      while (i < reqs_count && !ring_full_for(conn, &req[i]))
+        {
+            pthread_mutex_lock(&conn->free_mutex);
+            assert(!list_empty(&conn->free_callbacks));
+            callback = list_entry(conn->free_callbacks.next, struct castle_front_callback, list);
+            list_del(&callback->list);
+            pthread_mutex_unlock(&conn->free_mutex);
+
+            call_id = callback - conn->callbacks;
+            req[i].call_id = call_id;
+
+            callback->callback = callbacks ? callbacks[i] : NULL;
+            callback->data = datas ? datas[i] : NULL;
+            callback->token = get_request_token(&req[i]);
+
+            if (__builtin_expect(conn->debug_log != NULL, 0)) {
+              flockfile(conn->debug_log);
+              castle_print_request(conn->debug_log, &req[i], conn->debug_values);
+              fprintf(conn->debug_log, "\n");
+              fflush(conn->debug_log);
+              funlockfile(conn->debug_log);
+            }
+
+            if (callback->token) {
+              unsigned int x = callback->token % CASTLE_STATEFUL_OPS;
+              assert(x < CASTLE_STATEFUL_OPS);
+              int old = __sync_fetch_and_add(&conn->outstanding_stateful_requests[x], 1);
+              if (old == 0) {
+                assert(conn->front_ring.reserved > 0);
+                atomic_dec(&conn->front_ring.reserved);
+              }
+            }
+
+            /* More req_prod_pvt hazards */
+            castle_request_t *ring_req = RING_GET_REQUEST(&conn->front_ring, conn->front_ring.req_prod_pvt);
+            debug("Putting request %d at position %d\n", call_id, conn->front_ring.req_prod_pvt);
+            conn->front_ring.req_prod_pvt++;
+
+            memcpy(ring_req, req + i, sizeof(*ring_req));
+
+            i++;
+        }
+
+        /* This uses req_prod (safe due to strict ordering guarantees) and req_prod_pvt (hazard) */
+        RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&conn->front_ring, notify);
+
+        debug("notify=%d\n", notify);
+
+        if (notify)
+        {
+#ifdef TRACE
+            ioctls_counter++;
+#endif
+            ioctl(conn->fd, CASTLE_IOCTL_POKE_RING);
+        }
+    }
+
+    pthread_mutex_unlock(&conn->ring_mutex);
+}
+
+static void castle_blocking_callback(castle_connection *conn __attribute__((unused)),
+                                    castle_response_t *resp, void *data)
+{
+    struct castle_blocking_call *call = data;
+
+    call->err = resp->err;
+    call->token = resp->token;
+    call->length = resp->length;
+
+    pthread_mutex_lock(&blocking_call_mutex);
+    call->completed = 1;
+    pthread_cond_broadcast(&blocking_call_cond);
+    pthread_mutex_unlock(&blocking_call_mutex);
+}
+
+int castle_request_do_blocking(castle_connection *conn,
+                                     castle_request_t *req,
+                                     struct castle_blocking_call *blocking_call)
+{
+    /*
+     * Warning variables these will be on stack but used elsewhere, only safe as
+     * this function sleeps until they are finished with (see castle_blocking_callback)
+     */
+    void *blocking_calls = blocking_call;
+    castle_callback callback = &castle_blocking_callback;
+
+    blocking_call->completed = 0;
+
+    castle_request_send(conn, req, &callback, &blocking_calls, 1);
+
+    pthread_mutex_lock(&blocking_call_mutex);
+    while (conn->fd >= 0 && !blocking_call->completed)
+        pthread_cond_wait(&blocking_call_cond, &blocking_call_mutex);
+    pthread_mutex_unlock(&blocking_call_mutex);
+
+    if (conn->fd < 0 && !blocking_call->completed) {
+      blocking_call->completed = 1;
+      blocking_call->err = EUNATCH;
+    }
+
+    return blocking_call->err;
+}
+
+int castle_request_do_blocking_multi(castle_connection *conn,
+                                     castle_request_t *req,
+                                     struct castle_blocking_call *blocking_call,
+                                     int count)
+{
+    int i;
+    void **blocking_calls;
+    castle_callback *callbacks;
+
+    blocking_calls = malloc(sizeof(struct castle_blocking_call *) * count);
+    callbacks = malloc(sizeof(callbacks[0]) * count);
+
+    for (i = 0; i < count; i++)
+    {
+        blocking_call[i].completed = 0;
+        blocking_calls[i] = &blocking_call[i];
+        callbacks[i] = castle_blocking_callback;
+    }
+
+    castle_request_send(conn, req, callbacks, blocking_calls, count);
+
+    pthread_mutex_lock(&blocking_call_mutex);
+    for (i = 0; i < count; i++)
+    {
+        while (conn->fd >= 0 && !blocking_call[i].completed)
+            pthread_cond_wait(&blocking_call_cond, &blocking_call_mutex);
+    }
+    pthread_mutex_unlock(&blocking_call_mutex);
+
+    free(blocking_calls);
+    free(callbacks);
+
+    for (i = 0; i < count; i++)
+      if (conn->fd < 0 && !blocking_call[i].completed) {
+        blocking_call[i].completed = 1;
+        blocking_call[i].err = EUNATCH;
+      }
+
+    for (i = 0; i < count; i++)
+        if (blocking_call[i].err)
+            return blocking_call[i].err;
+
+    return 0;
+}
+
+uint32_t
+castle_max_buffer_size(void) {
+  return 1048576;
+}
diff --git a/castle_ioctl.c b/castle_ioctl.c
new file mode 100644
index 0000000..6daf2f1
--- /dev/null
+++ b/castle_ioctl.c
@@ -0,0 +1,169 @@
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <linux/fs.h>
+
+#include "castle.h"
+#include "castle_public.h"
+
+#include "castle_private.h"
+
+int castle_protocol_version(struct castle_front_connection *conn) {
+    struct castle_control_ioctl ctl;
+    int ret;
+    ctl.cmd = CASTLE_CTRL_PROTOCOL_VERSION;
+
+    ret = ioctl(conn->fd, CASTLE_CTRL_PROTOCOL_VERSION_IOCTL, &ctl);
+    if (__builtin_expect(conn->debug_log != NULL, 0)) {
+      fprintf(conn->debug_log, "protocol_version() = %d, %d\n", ret, ctl.protocol_version.version);
+      fflush(conn->debug_log);
+    }
+    if (ret)
+      return -1;
+
+    return ctl.protocol_version.version;
+}
+
+#define C_PRINTF_uint32 "%u"
+#define C_PRINTF_uint64 "%lu"
+#define C_PRINTF_slave_uuid "%u"
+#define C_PRINTF_version "%u"
+#define C_PRINTF_size "%zu"
+#define C_PRINTF_string "%s"
+#define C_PRINTF_collection_id "%u"
+#define C_PRINTF_env_var "%u"
+#define C_PRINTF_int "%d"
+#define C_PRINTF_int32 "%d"
+
+#define CASTLE_IOCTL_0IN_0OUT(_id, _name)                                                         \
+int castle_##_id (struct castle_front_connection *conn)                                           \
+{                                                                                                 \
+    struct castle_control_ioctl ctl;                                                              \
+    int ret;                                                                                      \
+    ctl.cmd = _name;                                                                              \
+                                                                                                  \
+    ret = ioctl(conn->fd, _name##_IOCTL, &ctl);                                                   \
+    if (__builtin_expect(conn->debug_log != NULL, 0)) {                                           \
+      fprintf(conn->debug_log, #_id "() = %d\n", ret);                                            \
+      fflush(conn->debug_log);                                                                    \
+    }                                                                                             \
+    if (ret)                                                                                      \
+        return errno;                                                                             \
+                                                                                                  \
+    return ctl._id.ret;                                                                           \
+}
+
+#define CASTLE_IOCTL_1IN_0OUT(_id, _name, _arg_1_t, _arg_1)                                       \
+int castle_##_id (struct castle_front_connection *conn,                                           \
+    C_TYPE_##_arg_1_t _arg_1)                                                                     \
+{                                                                                                 \
+    struct castle_control_ioctl ctl;                                                              \
+    int ret;                                                                                      \
+    ctl.cmd = _name;                                                                              \
+    ctl._id._arg_1 = _arg_1;                                                                      \
+                                                                                                  \
+    ret = ioctl(conn->fd, _name##_IOCTL, &ctl);                                                   \
+    if (__builtin_expect(conn->debug_log != NULL, 0)) {                                           \
+      fprintf(conn->debug_log,                                                                    \
+              #_id "(" #_arg_1 " = " C_PRINTF_##_arg_1_t ") = %d\n",                              \
+              _arg_1, ret);                                                                       \
+      fflush(conn->debug_log);                                                                    \
+    }                                                                                             \
+    if (ret)                                                                                      \
+        return errno;                                                                             \
+                                                                                                  \
+    return ctl._id.ret;                                                                           \
+}
+
+#define CASTLE_IOCTL_1IN_1OUT(_id, _name, _arg_1_t, _arg_1, _ret_1_t, _ret)                       \
+int castle_##_id (struct castle_front_connection *conn,                                           \
+    C_TYPE_##_arg_1_t _arg_1,                                                                     \
+    C_TYPE_##_ret_1_t * _ret##_out)                                                               \
+{                                                                                                 \
+    struct castle_control_ioctl ctl;                                                              \
+    int ret;                                                                                      \
+    ctl.cmd = _name;                                                                              \
+    ctl._id._arg_1 = _arg_1;                                                                      \
+                                                                                                  \
+    ret = ioctl(conn->fd, _name##_IOCTL, &ctl);                                                   \
+    if (__builtin_expect(conn->debug_log != NULL, 0)) {                                           \
+      fprintf(conn->debug_log, #_id "(" #_arg_1 " = " C_PRINTF_##_arg_1_t                         \
+              ", " #_ret " = " C_PRINTF_##_ret_1_t ") = %d\n", _arg_1, ctl._id.ret, ret);         \
+      fflush(conn->debug_log);                                                                    \
+    }                                                                                             \
+    if (ret)                                                                                      \
+        return errno;                                                                             \
+                                                                                                  \
+    * _ret##_out = ctl._id._ret;                                                                  \
+                                                                                                  \
+    return ctl._id.ret;                                                                           \
+}                                                                                                 \
+
+#define CASTLE_IOCTL_2IN_0OUT(_id, _name, _arg_1_t, _arg_1, _arg_2_t, _arg_2)                     \
+int castle_##_id (struct castle_front_connection *conn,                                           \
+    C_TYPE_##_arg_1_t _arg_1, C_TYPE_##_arg_2_t _arg_2)                                           \
+{                                                                                                 \
+    struct castle_control_ioctl ctl;                                                              \
+    int ret;                                                                                      \
+    ctl.cmd = _name;                                                                              \
+    ctl._id._arg_1 = _arg_1;                                                                      \
+    ctl._id._arg_2 = _arg_2;                                                                      \
+                                                                                                  \
+    ret = ioctl(conn->fd, _name##_IOCTL, &ctl);                                                   \
+    if (__builtin_expect(conn->debug_log != NULL, 0)) {                                           \
+      fprintf(conn->debug_log,                                                                    \
+              #_id "(" #_arg_1 " = " C_PRINTF_##_arg_1_t                                          \
+                  ", " #_arg_2 " = " C_PRINTF_##_arg_2_t ") = %d\n",                              \
+              _arg_1, _arg_2, ret);                                                               \
+      fflush(conn->debug_log);                                                                    \
+    }                                                                                             \
+    if (ret)                                                                                      \
+        return errno;                                                                             \
+                                                                                                  \
+    return ctl._id.ret;                                                                           \
+}
+
+#define CASTLE_IOCTL_3IN_1OUT(_id, _name, _arg_1_t, _arg_1, _arg_2_t, _arg_2,                     \
+    _arg_3_t, _arg_3, _ret_1_t, _ret)                                                             \
+int castle_##_id (struct castle_front_connection *conn,                                           \
+    C_TYPE_##_arg_1_t _arg_1,                                                                     \
+    C_TYPE_##_arg_2_t _arg_2,                                                                     \
+    C_TYPE_##_arg_3_t _arg_3,                                                                     \
+    C_TYPE_##_ret_1_t * _ret##_out)                                                               \
+{                                                                                                 \
+    struct castle_control_ioctl ctl;                                                              \
+    int ret;                                                                                      \
+                                                                                                  \
+    ctl.cmd = _name;                                                                              \
+    ctl._id._arg_1 = _arg_1;                                                                      \
+    ctl._id._arg_2 = _arg_2;                                                                      \
+    ctl._id._arg_3 = _arg_3;                                                                      \
+                                                                                                  \
+    ret = ioctl(conn->fd, _name##_IOCTL, &ctl);                                                   \
+    if (__builtin_expect(conn->debug_log != NULL, 0)) {                                           \
+      fprintf(conn->debug_log, #_id "(" #_arg_1 " = " C_PRINTF_##_arg_1_t                         \
+              ", " #_arg_2 " = " C_PRINTF_##_arg_2_t                                              \
+              ", " #_arg_3 " = " C_PRINTF_##_arg_3_t                                              \
+              ", " #_ret " = " C_PRINTF_##_ret_1_t ") = %d\n",                                    \
+              _arg_1, _arg_2, _arg_3, ctl._id.ret, ret);                                          \
+      fflush(conn->debug_log);                                                                    \
+    }                                                                                             \
+    if (ret)                                                                                      \
+        return errno;                                                                             \
+                                                                                                  \
+    * _ret##_out = ctl._id._ret;                                                                  \
+                                                                                                  \
+    return ctl._id.ret;                                                                           \
+}
+
+CASTLE_IOCTLS
+PRIVATE_CASTLE_IOCTLS
diff --git a/castle_print.c b/castle_print.c
new file mode 100644
index 0000000..60da06c
--- /dev/null
+++ b/castle_print.c
@@ -0,0 +1,139 @@
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+
+#include "castle.h"
+
+/* Because it's awkward to keep track of the proper result when making
+   multiple stdio calls, these two handle it - assuming that 'len'
+   accumulates the length for return, -1 is returned on error, and... */
+
+/* ...you're calling a function which returns length on success */
+#define call_stdio_len(exp) ({ int r = (exp); if (r < 0) return -1; len += r; r; })
+/* ...you're calling a function which returns the character written on success */
+#define call_stdio_char(exp) ({ int r = (exp); if (r < 0) return -1; len += 1; r; })
+
+static int
+print_escaped(FILE *f, const char *str, unsigned int str_len) {
+  int len = 0;
+  for (unsigned int i = 0; i < str_len; i++) {
+    char c = str[i];
+    if (isprint(c) && c != ',' && c != '(' && c != ')') {
+      call_stdio_char(fputc(c, f));
+    }
+    else {
+      call_stdio_len(fprintf(f, "\\x%02hhx", (uint8_t)c));
+    }
+  }
+  return len;
+}
+
+int
+castle_print_key(FILE *f, castle_key *key) {
+  int len = 0;
+  call_stdio_len(fprintf(f, "("));
+  for (unsigned int i = 0; i < castle_key_dims(key); i++) {
+    if (i > 0)
+      call_stdio_char(fputc(',', f));
+
+    const uint8_t *elem = castle_key_elem_data(key, i);
+    uint32_t elem_len = castle_key_elem_len(key, i);
+    if (elem_len == 0)
+      call_stdio_len(fprintf(f, "(invalid zero-length element)"));
+    else
+      call_stdio_len(print_escaped(f, (const char *)elem, elem_len));
+  }
+  call_stdio_char(fputc(')', f));
+  return len;
+}
+
+static const char *command_names[] = {
+  [CASTLE_RING_REPLACE] = "replace",
+  [CASTLE_RING_BIG_PUT] = "big_put",
+  [CASTLE_RING_PUT_CHUNK] = "put_chunk",
+  [CASTLE_RING_GET] = "get",
+  [CASTLE_RING_BIG_GET] = "big_get",
+  [CASTLE_RING_GET_CHUNK] = "get_chunk",
+  [CASTLE_RING_ITER_START] = "iter_start",
+  [CASTLE_RING_ITER_NEXT] = "iter_next",
+  [CASTLE_RING_ITER_FINISH] = "iter_finish",
+  [CASTLE_RING_REMOVE] = "remove",
+};
+
+int
+castle_print_request(FILE *f, castle_request *req, int print_values) {
+  int len = 0;
+  call_stdio_len(fprintf(f, "%s(call_id=%u, ", command_names[req->tag], req->call_id));
+  switch (req->tag) {
+  case CASTLE_RING_REPLACE:
+    {
+      char key_buf[req->replace.key_len];
+      memcpy(key_buf, req->replace.key_ptr, req->replace.key_len);
+      call_stdio_len(fprintf(f, "collection=%u, key=", req->replace.collection_id));
+      call_stdio_len(castle_print_key(f, req->replace.key_ptr));
+      if (print_values) {
+        call_stdio_len(fprintf(f, ", value="));
+        call_stdio_len(print_escaped(f, req->replace.value_ptr, req->replace.value_len));
+      }
+      break;
+    }
+  case CASTLE_RING_BIG_PUT:
+    call_stdio_len(fprintf(f, "collection=%u, key=", req->big_put.collection_id));
+    call_stdio_len(castle_print_key(f, req->big_put.key_ptr));
+    call_stdio_len(fprintf(f, ", len=%llu", (long long unsigned)req->big_put.value_len));
+    break;
+  case CASTLE_RING_PUT_CHUNK:
+    call_stdio_len(fprintf(f, "token=%u", req->put_chunk.token));
+    if (print_values) {
+      call_stdio_len(fprintf(f, ", data="));
+      call_stdio_len(print_escaped(f, req->put_chunk.buffer_ptr, req->put_chunk.buffer_len));
+    }
+    break;
+  case CASTLE_RING_GET:
+    call_stdio_len(fprintf(f, "collection=%u, key=", req->get.collection_id));
+    call_stdio_len(castle_print_key(f, req->get.key_ptr));
+    call_stdio_len(fprintf(f, ", buffer=%p, buffer_len=%u", req->get.value_ptr, req->get.value_len));
+    break;
+  case CASTLE_RING_BIG_GET:
+    call_stdio_len(fprintf(f, "collection=%u, key=", req->big_get.collection_id));
+    call_stdio_len(castle_print_key(f, req->big_get.key_ptr));
+    break;
+  case CASTLE_RING_GET_CHUNK:
+    call_stdio_len(fprintf(f, "token=%u, buffer=%p, buffer_len=%u", req->get_chunk.token, req->get_chunk.buffer_ptr, req->get_chunk.buffer_len));
+    break;
+  case CASTLE_RING_ITER_START:
+    call_stdio_len(fprintf(f, "collection=%u, start_key=", req->iter_start.collection_id));
+    call_stdio_len(castle_print_key(f, req->iter_start.start_key_ptr));
+    call_stdio_len(fprintf(f, ", end_key="));
+    call_stdio_len(castle_print_key(f, req->iter_start.end_key_ptr));
+    call_stdio_len(fprintf(f, ", flags="));
+    if (req->iter_start.flags & ~CASTLE_RING_ITER_FLAG_NO_VALUES)
+      call_stdio_len(fprintf(f, "error(%llx)", (long long unsigned)req->iter_start.flags));
+    else if (req->iter_start.flags & CASTLE_RING_ITER_FLAG_NO_VALUES)
+      call_stdio_len(fprintf(f, "no_values"));
+    else
+      call_stdio_len(fprintf(f, "none"));
+    break;
+  case CASTLE_RING_ITER_NEXT:
+    call_stdio_len(fprintf(f, "token=%u, buffer=%p, buffer_len=%u", req->iter_next.token, req->iter_next.buffer_ptr, req->iter_next.buffer_len));
+    break;
+  case CASTLE_RING_ITER_FINISH:
+    call_stdio_len(fprintf(f, "token=%u", req->iter_next.token));
+    break;
+  case CASTLE_RING_REMOVE:
+    call_stdio_len(fprintf(f, "collection=%u, key=", req->remove.collection_id));
+    call_stdio_len(castle_print_key(f, req->remove.key_ptr));
+    break;
+  default:
+    call_stdio_len(fprintf(f, "unknown(%x)", req->tag));
+    break;
+  }
+  call_stdio_char(fputc(')', f));
+  return len;
+}
+
+/* TODO: implement print_values */
+int
+castle_print_response(FILE *f, castle_response *resp, int print_values __attribute__((unused))) {
+  return fprintf(f, "response(call_id=%u, err=%u, length=%llu, token=%u)", resp->call_id, resp->err, (long long unsigned)resp->length, resp->token);
+}
diff --git a/castle_private.h b/castle_private.h
new file mode 100644
index 0000000..99a914d
--- /dev/null
+++ b/castle_private.h
@@ -0,0 +1,52 @@
+#ifndef __CASTLE_PRIVATE_H__
+#define __CASTLE_PRIVATE_H__
+
+#include <stdio.h>
+#include <pthread.h>
+
+#include "ring.h"
+#include "list.h"
+
+#include "castle_public.h"
+
+DEFINE_RING_TYPES(castle, castle_request_t, castle_response_t);
+
+int castle_protocol_version(struct castle_front_connection *conn);
+
+struct castle_front_callback
+{
+    struct list_head    list;
+    castle_callback     callback;
+    void               *data;
+  castle_interface_token_t token;
+};
+
+struct castle_front_connection
+{
+    int                 fd; /* tests rely on this being the first field */
+    castle_sring_t     *shared_ring;
+    castle_front_ring_t front_ring;
+    int                 next_call_id;
+    /* pointer to array of callback pointers, corresponding to requests on ring */
+
+    pthread_mutex_t     free_mutex;
+    struct castle_front_callback *callbacks;
+    struct list_head    free_callbacks;
+
+  int outstanding_stateful_requests[CASTLE_STATEFUL_OPS];
+
+    pthread_t           response_thread;
+    int                 response_thread_exit;
+    int                 response_thread_running;
+
+    pthread_mutex_t     ring_mutex;
+    pthread_cond_t      ring_cond;
+
+    /* pipe fds to wake up select in the response thread */
+    int                 select_pipe[2];
+
+    FILE *              debug_log;
+    int                 debug_values;
+} PACKED;
+
+#endif /* __CASTLE_PRIVATE_H__ */
diff --git a/castle_public.h b/castle_public.h
new file mode 100644
index 0000000..3f63241
--- /dev/null
+++ b/castle_public.h
@@ -0,0 +1,588 @@
+#ifndef __CASTLE_PUBLIC_H__
+#define __CASTLE_PUBLIC_H__
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+#ifndef __KERNEL__
+#include <sys/time.h>
+#endif
+
+#define CASTLE_PROTOCOL_VERSION 7
+
+#define PACKED               __attribute__((packed))
+
+#ifndef __KERNEL__
+#define PAGE_SIZE 4096
+#define PAGE_SHIFT 12
+/* These must be the same as castle.h in fs.hg */
+enum {
+    CVT_TYPE_INLINE          = 0x10,
+    CVT_TYPE_ONDISK          = 0x20,
+    CVT_TYPE_INVALID         = 0x30,
+};
+#endif
+
+typedef enum {
+    NO_FAULT,           /* 0 */
+    MERGE_FAULT,        /* 1 */
+    EXTENT_FAULT,       /* 2 */ 
+    FREESPACE_FAULT,    /* 3 */
+    REPLACE_FAULT,      /* 4 */
+    GET_FAULT,          /* 5 */
+    BIG_PUT_FAULT,      /* 6 */
+    BIG_GET_FAULT,      /* 7 */
+    CHECKPOINT_FAULT,   /* 8 */
+    CLAIM_FAULT,        /* 9 */
+    FS_INIT_FAULT,      /*10 */
+    FS_RESTORE_FAULT,   /*11 */
+    FINI_FAULT,         /*12 */
+    SLAVE_OOS_ERR,      /*13 */
+    REBUILD_FAULT1,     /*14 Fault between extent remaps*/
+    REBUILD_FAULT2,     /*15 Fault in mid extent remap*/
+} c_fault_t;
+
+typedef enum {
+    BUILD_ID            = 0,
+    LAST_ENV_VAR_ID,
+} c_env_var_t; 
+
+/**
+ * Trace providers.
+ */
+typedef enum {
+    TRACE_CACHE,            /**< Cache events       */
+    TRACE_DA,               /**< DA events          */
+    TRACE_DA_MERGE,         /**< Merge events       */
+    TRACE_DA_MERGE_UNIT,    /**< Merge unit events  */
+} c_trc_prov_t;
+
+/**
+ * Event types.
+ */
+typedef enum {
+    TRACE_VALUE,        /**< Value being reported   */
+    TRACE_MARK,         /**< Event has occurred     */
+    TRACE_START,        /**< Event has started      */
+    TRACE_END,          /**< Event has ended        */
+} c_trc_type_t;
+
+/**
+ * Cache trace variables.
+ */
+typedef enum {
+    TRACE_CACHE_CHECKPOINT_ID,          /**< Checkpoint running.                                */
+    TRACE_CACHE_DIRTY_PGS_ID,           /**< Number of c2ps on the dirtylist.                   */
+    TRACE_CACHE_CLEAN_PGS_ID,           /**< Number of c2ps on the cleanlist.                   */
+    TRACE_CACHE_FREE_PGS_ID,            /**< Number of c2ps on the freelist.                    */
+    TRACE_CACHE_RESERVE_PGS_ID,         /**< Number of c2ps on the reserve freelist.            */
+    TRACE_CACHE_CLEAN_BLKS_ID,          /**< Number of c2bs on the cleanlist.                   */
+    TRACE_CACHE_FREE_BLKS_ID,           /**< Number of c2bs on the freelist.                    */
+    TRACE_CACHE_RESERVE_BLKS_ID,        /**< Number of c2bs on the reserve freelist.            */
+    TRACE_CACHE_SOFTPIN_BLKS_ID,        /**< Number of softpin c2bs in the cache.               */
+    TRACE_CACHE_BLOCK_VICTIMS_ID,       /**< Number of c2bs evicted from the cache.             */
+    TRACE_CACHE_SOFTPIN_VICTIMS_ID,     /**< Number of softpinned c2bs evicted from the cache.  */
+    TRACE_CACHE_READS_ID,               /**< Number of reads this tick.                         */
+    TRACE_CACHE_WRITES_ID,              /**< Number of writes this tick.                        */
+} c_trc_cache_var_t;
+
+/**
+ * DA trace variables.
+ */
+typedef enum {
+    TRACE_DA_INSERTS_DISABLED_ID,                   /**< Whether inserts are enabled or not.    */
+    TRACE_DA_MERGE_ID,                              /**< Merge                                  */
+    TRACE_DA_MERGE_MODLIST_ITER_INIT_ID,            /**< Modlist iter init                      */
+    TRACE_DA_MERGE_UNIT_ID,                         /**< Merge unit                             */
+    TRACE_DA_MERGE_UNIT_C2B_SYNC_WAIT_BT_NS_ID,
+    TRACE_DA_MERGE_UNIT_C2B_SYNC_WAIT_DATA_NS_ID,
+    TRACE_DA_MERGE_UNIT_GET_C2B_NS_ID,
+    TRACE_DA_MERGE_UNIT_MOBJ_COPY_NS_ID,
+} c_trc_da_var_t;
+
+#define MERGE_START_FLAG    (1U<<0)
+#define MERGE_END_FLAG      (1U<<1)
+
+/* Bump the magic version byte (LSB) when c_trc_evt_t changes. */
+#define CASTLE_TRACE_MAGIC          0xCAE5E10D
+typedef struct castle_trace_event {
+    uint32_t                    magic;
+    struct timeval              timestamp;
+    int                         cpu;        /**< CPU ID that allocated structure.       */
+    c_trc_prov_t                provider;   /**< Event provider                         */
+    c_trc_type_t                type;       /**< Event type                             */
+    int                         var;        /**< Event variable                         */
+    uint64_t                    v1;
+    uint64_t                    v2;
+    uint64_t                    v3;
+    uint64_t                    v4;
+    uint64_t                    v5;
+} c_trc_evt_t;
+
+typedef uint32_t transfer_id_t;
+typedef uint32_t slave_uuid_t;
+typedef uint32_t collection_id_t;
+typedef uint32_t version_t;         /**< Version ID type, unique across all Doubling Arrays.    */
+#define INVAL_VERSION       ((version_t)-1)
+#define VERSION_INVAL(_v)   ((_v) == INVAL_VERSION)
+
+/* And our IOCTL code is: */
+#define CASTLE_CTRL_IOCTL_TYPE                  (0xCA)
+
+/* Subtypes for CASTLE_CTRL_ used for IOCTLs */
+#define CASTLE_CTRL_CLAIM                    1
+#define CASTLE_CTRL_RELEASE                  2
+#define CASTLE_CTRL_ATTACH                   3
+#define CASTLE_CTRL_DETACH                   4
+#define CASTLE_CTRL_CREATE                   5
+#define CASTLE_CTRL_CLONE                    6
+#define CASTLE_CTRL_SNAPSHOT                 7
+#define CASTLE_CTRL_INIT                     8
+#define CASTLE_CTRL_TRANSFER_CREATE          11
+#define CASTLE_CTRL_TRANSFER_DESTROY         12
+#define CASTLE_CTRL_COLLECTION_ATTACH        13
+#define CASTLE_CTRL_COLLECTION_DETACH        14
+#define CASTLE_CTRL_COLLECTION_SNAPSHOT      15
+#define CASTLE_CTRL_RESERVE_FOR_TRANSFER     16
+#define CASTLE_CTRL_VALID_STATS              17
+#define CASTLE_CTRL_INVALID_STATS            18
+#define CASTLE_CTRL_SET_TARGET               19
+#define CASTLE_CTRL_DESTROY                  20
+#define CASTLE_CTRL_PROTOCOL_VERSION         21
+#define CASTLE_CTRL_FAULT                    22
+#define CASTLE_CTRL_ENVIRONMENT_SET          23
+#define CASTLE_CTRL_TRACE_SETUP              24
+#define CASTLE_CTRL_TRACE_START              25
+#define CASTLE_CTRL_TRACE_STOP               26
+#define CASTLE_CTRL_TRACE_TEARDOWN           27
+#define CASTLE_CTRL_SLAVE_EVACUATE           28
+#define CASTLE_CTRL_THREAD_PRIORITY          29
+#define CASTLE_CTRL_SLAVE_SCAN               30
+
+typedef struct castle_control_cmd_claim {
+    uint32_t     dev;          /* IN  */
+    int          ret;          /* OUT */
+    slave_uuid_t id;           /* OUT */
+} cctrl_cmd_claim_t;
+
+typedef struct castle_control_cmd_release {
+    slave_uuid_t id;           /* IN  */
+    int          ret;          /* OUT */
+} cctrl_cmd_release_t;
+
+typedef struct castle_control_cmd_attach {
+    version_t version;         /* IN  */
+    int       ret;             /* OUT */
+    uint32_t  dev;             /* OUT */
+} cctrl_cmd_attach_t;
+
+typedef struct castle_control_cmd_detach {
+    uint32_t dev;              /* IN  */
+    int      ret;              /* OUT */
+} cctrl_cmd_detach_t;
+
+typedef struct castle_control_cmd_snapshot {
+    uint32_t  dev;             /* IN  */
+    int       ret;             /* OUT */
+    version_t version;         /* OUT */
+} cctrl_cmd_snapshot_t;
+
+typedef struct castle_control_cmd_collection_attach {
+    version_t           version;         /* IN  */
+    const char         *name;            /* IN  */
+    size_t              name_length;     /* IN  */
+    int                 ret;             /* OUT */
+    collection_id_t     collection;      /* OUT */
+} cctrl_cmd_collection_attach_t;
+
+typedef struct castle_control_cmd_collection_detach {
+    collection_id_t collection;          /* IN  */
+    int             ret;                 /* OUT */
+} cctrl_cmd_collection_detach_t;
+
+typedef struct castle_control_cmd_collection_snapshot {
+    collection_id_t collection; /* IN  */
+    int       ret;             /* OUT */
+    version_t version;         /* OUT */
+} cctrl_cmd_collection_snapshot_t;
+
+typedef struct castle_control_cmd_create {
+    uint64_t  size;            /* IN  */
+    int       ret;             /* OUT */
+    version_t id;              /* OUT */
+} cctrl_cmd_create_t;
+
+enum {
+    CASTLE_DESTROY_TREE = 0,
+    CASTLE_DESTROY_VERSION = 1,
+};
+typedef struct castle_control_cmd_destroy {
+    version_t   version;         /* IN */
+    int         flag;            /* IN */
+    int         ret;             /* OUT */
+} cctrl_cmd_destroy_t;
+
+typedef struct castle_control_cmd_clone {
+    version_t version;         /* IN  */
+    int       ret;             /* OUT */
+    version_t clone;           /* OUT */
+} cctrl_cmd_clone_t;
+
+typedef struct castle_control_cmd_init {
+    int ret;                   /* OUT */
+} cctrl_cmd_init_t;
+
+typedef struct castle_control_cmd_transfer_create {
+    version_t     version;     /* IN  */
+    uint32_t      direction;   /* IN  */
+    int           ret;         /* OUT */
+    transfer_id_t id;          /* OUT */
+} cctrl_cmd_transfer_create_t;
+
+typedef struct castle_control_cmd_transfer_destroy {
+    transfer_id_t id;          /* IN  */
+    int           ret;         /* OUT */
+} cctrl_cmd_transfer_destroy_t;
+
+typedef struct castle_control_cmd_protocol_version {
+    int ret;                   /* OUT */
+    uint32_t version;          /* OUT */
+} cctrl_cmd_protocol_version_t;
+
+typedef struct castle_control_cmd_environment_set {
+    c_env_var_t var_id;        /* IN */
+    const char *var_str;       /* IN  */
+    size_t      var_len;       /* IN  */
+    int         ret;           /* OUT */
+} cctrl_cmd_environment_set_t;
+
+typedef struct castle_control_cmd_fault {
+    c_fault_t fault_id;        /* IN  */
+    uint32_t  fault_arg;       /* IN  */
+    int       ret;             /* OUT */
+} cctrl_cmd_fault_t;
+
+typedef struct castle_control_cmd_trace_setup {
+    const char *dir_str;       /* IN  */
+    size_t      dir_len;       /* IN  */
+    int         ret;           /* OUT */
+} cctrl_cmd_trace_setup_t;
+
+typedef struct castle_control_cmd_trace_start {
+    int         ret;           /* OUT */
+} cctrl_cmd_trace_start_t;
+
+typedef struct castle_control_cmd_trace_stop {
+    int         ret;           /* OUT */
+} cctrl_cmd_trace_stop_t;
+
+typedef struct castle_control_cmd_trace_teardown {
+    int         ret;           /* OUT */
+} cctrl_cmd_trace_teardown_t;
+
+typedef struct castle_control_slave_evacuate {
+    slave_uuid_t id;           /* IN  */
+    uint32_t     force;        /* IN  */
+    int          ret;          /* OUT */
+} PACKED cctrl_cmd_slave_evacuate_t;
+
+typedef struct castle_control_slave_scan {
+    slave_uuid_t id;           /* IN  */
+    int          ret;          /* OUT */
+} PACKED cctrl_cmd_slave_scan_t;
+
+typedef struct castle_control_cmd_thread_priority {
+    int       nice_value;       /* IN */
+    int       ret;             /* OUT */
+} cctrl_cmd_thread_priority_t;
+
+typedef struct castle_control_ioctl {
+    uint16_t cmd;
+    union {
+        cctrl_cmd_claim_t               claim;
+        cctrl_cmd_release_t             release;
+        cctrl_cmd_init_t                init;
+
+        cctrl_cmd_attach_t              attach;
+        cctrl_cmd_detach_t              detach;
+        cctrl_cmd_snapshot_t            snapshot;
+
+        cctrl_cmd_collection_attach_t   collection_attach;
+        cctrl_cmd_collection_detach_t   collection_detach;
+        cctrl_cmd_collection_snapshot_t collection_snapshot;
+
+        cctrl_cmd_create_t              create;
+        cctrl_cmd_destroy_t             destroy;
+        cctrl_cmd_clone_t               clone;
+
+        cctrl_cmd_transfer_create_t     transfer_create;
+        cctrl_cmd_transfer_destroy_t    transfer_destroy;
+
+        cctrl_cmd_protocol_version_t    protocol_version;
+        cctrl_cmd_environment_set_t     environment_set;
+
+        cctrl_cmd_fault_t               fault;
+
+        cctrl_cmd_trace_setup_t         trace_setup;
+        cctrl_cmd_trace_start_t         trace_start;
+        cctrl_cmd_trace_stop_t          trace_stop;
+        cctrl_cmd_trace_teardown_t      trace_teardown;
+
+        cctrl_cmd_slave_evacuate_t      slave_evacuate;
+        cctrl_cmd_slave_scan_t          slave_scan;
+
+        cctrl_cmd_thread_priority_t     thread_priority;
+    };
+} cctrl_ioctl_t;
+
+/* IOCTL definitions. */
+enum {
+    CASTLE_CTRL_CLAIM_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_CLAIM, cctrl_ioctl_t),
+    CASTLE_CTRL_RELEASE_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_RELEASE, cctrl_ioctl_t),
+    CASTLE_CTRL_ATTACH_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_ATTACH, cctrl_ioctl_t),
+    CASTLE_CTRL_DETACH_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_DETACH, cctrl_ioctl_t),
+    CASTLE_CTRL_CREATE_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_CREATE, cctrl_ioctl_t),
+    CASTLE_CTRL_CLONE_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_CLONE, cctrl_ioctl_t),
+    CASTLE_CTRL_SNAPSHOT_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_SNAPSHOT, cctrl_ioctl_t),
+    CASTLE_CTRL_INIT_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_INIT, cctrl_ioctl_t),
+    CASTLE_CTRL_TRANSFER_CREATE_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_TRANSFER_CREATE, cctrl_ioctl_t),
+    CASTLE_CTRL_TRANSFER_DESTROY_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_TRANSFER_DESTROY, cctrl_ioctl_t),
+    CASTLE_CTRL_COLLECTION_ATTACH_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_COLLECTION_ATTACH, cctrl_ioctl_t),
+    CASTLE_CTRL_COLLECTION_DETACH_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_COLLECTION_DETACH, cctrl_ioctl_t),
+    CASTLE_CTRL_COLLECTION_SNAPSHOT_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_COLLECTION_SNAPSHOT, cctrl_ioctl_t),
+    CASTLE_CTRL_RESERVE_FOR_TRANSFER_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_RESERVE_FOR_TRANSFER, cctrl_ioctl_t),
+    CASTLE_CTRL_VALID_STATS_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_VALID_STATS, cctrl_ioctl_t),
+    CASTLE_CTRL_INVALID_STATS_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_INVALID_STATS, cctrl_ioctl_t),
+    CASTLE_CTRL_SET_TARGET_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_SET_TARGET, cctrl_ioctl_t),
+    CASTLE_CTRL_DESTROY_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_DESTROY, cctrl_ioctl_t),
+    CASTLE_CTRL_PROTOCOL_VERSION_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_PROTOCOL_VERSION, cctrl_ioctl_t),
+    CASTLE_CTRL_ENVIRONMENT_SET_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_ENVIRONMENT_SET, cctrl_ioctl_t),
+    CASTLE_CTRL_FAULT_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_FAULT, cctrl_ioctl_t),
+    CASTLE_CTRL_TRACE_SETUP_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_TRACE_SETUP, cctrl_ioctl_t),
+    CASTLE_CTRL_TRACE_START_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_TRACE_START, cctrl_ioctl_t),
+    CASTLE_CTRL_TRACE_STOP_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_TRACE_STOP, cctrl_ioctl_t),
+    CASTLE_CTRL_TRACE_TEARDOWN_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_TRACE_TEARDOWN, cctrl_ioctl_t),
+    CASTLE_CTRL_SLAVE_EVACUATE_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_SLAVE_EVACUATE, cctrl_ioctl_t),
+    CASTLE_CTRL_THREAD_PRIORITY_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_THREAD_PRIORITY, cctrl_ioctl_t),
+    CASTLE_CTRL_SLAVE_SCAN_IOCTL =
+        _IOWR(CASTLE_CTRL_IOCTL_TYPE, CASTLE_CTRL_SLAVE_SCAN, cctrl_ioctl_t),
+};
+
+/*
+ * Variable length key, for example used by the btree
+ */
+
+typedef struct castle_var_length_key {
+    uint32_t length;
+    uint8_t key[];
+} PACKED c_vl_key_t;
+
+typedef struct castle_var_length_object_key {
+    uint32_t nr_dims;
+    c_vl_key_t *dims[];
+} PACKED c_vl_okey_t;
+
+#define CASTLE_RING_PAGES (16)                              /**< 64 requests/page.                */
+#define CASTLE_RING_SIZE (CASTLE_RING_PAGES << PAGE_SHIFT)  /**< Must be ^2 or things break.      */
+
+#define CASTLE_STATEFUL_OPS 512
+
+#define CASTLE_IOCTL_POKE_RING 2
+#define CASTLE_IOCTL_WAIT 3
+
+#define CASTLE_RING_REPLACE 1
+#define CASTLE_RING_BIG_PUT 2
+#define CASTLE_RING_PUT_CHUNK 3
+#define CASTLE_RING_GET 4
+#define CASTLE_RING_BIG_GET 5
+#define CASTLE_RING_GET_CHUNK 6
+#define CASTLE_RING_ITER_START 7
+#define CASTLE_RING_ITER_NEXT 8
+#define CASTLE_RING_ITER_FINISH 9
+#define CASTLE_RING_ITER_SKIP 10
+#define CASTLE_RING_REMOVE 11
+
+typedef uint32_t castle_interface_token_t;
+
+typedef struct castle_request_replace {
+    collection_id_t       collection_id;
+    c_vl_okey_t          *key_ptr;
+    uint32_t              key_len;
+    void                 *value_ptr;
+    uint32_t              value_len;
+} castle_request_replace_t;
+
+typedef struct castle_request_remove {
+    collection_id_t       collection_id;
+    c_vl_okey_t          *key_ptr;
+    uint32_t              key_len;
+} castle_request_remove_t;
+
+typedef struct castle_request_get {
+    collection_id_t      collection_id;
+    c_vl_okey_t         *key_ptr;
+    uint32_t             key_len;
+    void                *value_ptr; /* where to put the result */
+    uint32_t             value_len;
+} castle_request_get_t;
+
+typedef struct castle_request_iter_start {
+    collection_id_t      collection_id;
+    c_vl_okey_t         *start_key_ptr;
+    uint32_t             start_key_len;
+    c_vl_okey_t         *end_key_ptr;
+    uint32_t             end_key_len;
+    uint64_t             flags;
+} castle_request_iter_start_t;
+
+#define CASTLE_RING_ITER_FLAG_NONE      0x0
+#define CASTLE_RING_ITER_FLAG_NO_VALUES 0x1
+
+typedef struct castle_request_iter_next {
+    castle_interface_token_t  token;
+    void                     *buffer_ptr;
+    uint32_t                  buffer_len;
+} castle_request_iter_next_t;
+
+typedef struct castle_request_iter_finish {
+    castle_interface_token_t token;
+} castle_request_iter_finish_t;
+
+typedef struct castle_request_big_get {
+    collection_id_t  collection_id;
+    c_vl_okey_t     *key_ptr;
+    uint32_t         key_len;
+} castle_request_big_get_t;
+
+typedef struct castle_request_get_chunk {
+    castle_interface_token_t  token;
+    void                     *buffer_ptr;
+    uint32_t                  buffer_len;
+} castle_request_get_chunk_t;
+
+typedef struct castle_request_big_put {
+    collection_id_t  collection_id;
+    c_vl_okey_t     *key_ptr;
+    uint32_t         key_len;
+    uint64_t         value_len;
+} castle_request_big_put_t;
+
+typedef struct castle_request_put_chunk {
+    castle_interface_token_t  token;
+    void                     *buffer_ptr;
+    uint32_t                  buffer_len;
+} castle_request_put_chunk_t;
+
+typedef struct castle_request {
+    uint32_t call_id;
+    uint32_t tag;
+    union {
+        castle_request_replace_t     replace;
+        castle_request_remove_t      remove;
+        castle_request_get_t         get;
+
+        castle_request_big_get_t     big_get;
+        castle_request_get_chunk_t   get_chunk;
+        castle_request_big_put_t     big_put;
+        castle_request_put_chunk_t   put_chunk;
+
+        castle_request_iter_start_t  iter_start;
+        castle_request_iter_next_t   iter_next;
+        castle_request_iter_finish_t iter_finish;
+    };
+} castle_request_t;
+
+typedef struct castle_response {
+    uint32_t                 call_id;
+    uint32_t                 err;
+    uint64_t                 length;
+    castle_interface_token_t token;
+} castle_response_t;
+
+struct castle_iter_val {
+    uint64_t             length;
+    uint8_t              type;
+    union {
+        uint8_t         *val;
+        collection_id_t  collection_id;
+    };
+};
+
+struct castle_key_value_list {
+    struct castle_key_value_list *next;
+    c_vl_okey_t                  *key;
+    struct castle_iter_val       *val;
+};
+
+
+#define CASTLE_SLAVE_MAGIC1     (0x02061985)
+#define CASTLE_SLAVE_MAGIC2     (0x16071983)
+#define CASTLE_SLAVE_MAGIC3     (0x16061981)
+#define CASTLE_SLAVE_VERSION    (13)
+
+#define CASTLE_SLAVE_TARGET     (0x00000001)
+#define CASTLE_SLAVE_SPINNING   (0x00000002)
+#define CASTLE_SLAVE_NEWDEV     (0x00000004)
+#define CASTLE_SLAVE_SSD        (0x00000008)
+
+struct castle_slave_superblock_public {
+    /* align:   8 */
+    /* offset:  0 */ uint32_t magic1;
+    /*          4 */ uint32_t magic2;
+    /*          8 */ uint32_t magic3;
+    /*         12 */ uint32_t version;   /* Super chunk format version */
+    /*         16 */ uint32_t uuid;
+    /*         20 */ uint32_t used;
+    /*         24 */ uint64_t size;      /* In 4K blocks. */
+    /*         32 */ uint32_t flags;
+    /*         36 */ uint32_t checksum;
+    /*         40 */ uint8_t  _unused[88];
+    /*        128 */
+} PACKED;
+
+#define CASTLE_FS_MAGIC1        (0x19731121)
+#define CASTLE_FS_MAGIC2        (0x19880624)
+#define CASTLE_FS_MAGIC3        (0x19821120)
+#define CASTLE_FS_VERSION       (1)
+
+struct castle_fs_superblock_public {
+    /* align:   4 */
+    /* offset:  0 */ uint32_t magic1;
+    /*          4 */ uint32_t magic2;
+    /*          8 */ uint32_t magic3;
+    /*         12 */ uint32_t uuid;
+    /*         16 */ uint32_t version;   /* Super chunk format version */
+    /*         20 */ uint32_t salt;
+    /*         24 */ uint32_t peper;
+    /*         28 */ uint32_t checksum;
+    /*         32 */ uint8_t  _unused[96];
+    /*        128 */
+} PACKED;
+
+#endif /* __CASTLE_PUBLIC_H__ */
diff --git a/castle_utils.c b/castle_utils.c
new file mode 100644
index 0000000..e120194
--- /dev/null
+++ b/castle_utils.c
@@ -0,0 +1,262 @@
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <dirent.h>
+#include <errno.h>
+#include <string.h>
+#include <stdbool.h>
+
+#include "castle.h"
+#include "castle_private.h"
+
+/* get next castle_buffer* in free-list */
+#define BUF_NEXT(x) (*(castle_buffer**)((x)->buf))
+/* min size of pooled buffer */
+#define MIN_SIZE sizeof(castle_buffer*)
+
+#define max(a, b) ((a)>(b)?(a):(b))
+
+typedef struct pool_node
+{
+    size_t size;
+    castle_buffer* head;
+} pool_node;
+
+struct s_castle_shared_pool
+{
+    pool_node* free;
+    size_t nsizes;
+
+    pthread_mutex_t* lock;
+    pthread_cond_t* sig;
+    castle_connection* conn;
+};
+
+static int node_cmp(const void* a, const void* b)
+{
+    pool_node* l = (pool_node*)a, *r = (pool_node*)b;
+    return l->size < r->size ? -1 : l->size > r->size ? 1 : 0;
+}
+
+static pool_node* find_size_locked(castle_shared_pool* pool, size_t size, bool nonempty)
+{
+    size_t first = 0;
+    int last = pool->nsizes - 1;
+    if(pool->free[last].size < size)
+        return NULL;
+
+    /* binary search for the least upper bound which contains the requested size */
+    while(last >= (signed)first)
+    {
+        size_t test = first + (last-first)/2;
+        if(pool->free[test].size > size)
+            last = test - 1;
+        else if(pool->free[test].size < size)
+            first = test + 1;
+        else
+        {
+            first = test;
+            break;
+        }
+    }
+    /* increase size until we find a non-empty free-list */
+    if(nonempty)
+        while(first < pool->nsizes && !pool->free[first].head) ++first;
+
+    if(first < pool->nsizes)
+        return &pool->free[first];
+
+    /* all sufficiently large buffers are in use */
+    return NULL;
+}
+
+int castle_shared_pool_lease(castle_shared_pool* pool, castle_buffer** buffer, unsigned long size)
+{
+    if(!pool || !buffer || *buffer || size > pool->free[pool->nsizes-1].size)
+        return -EINVAL;
+
+    pool_node* node = NULL;
+
+    pthread_mutex_lock(pool->lock);
+    while(!(node = find_size_locked(pool, size, true)))
+        pthread_cond_wait(pool->sig, pool->lock);
+
+    castle_buffer* head = node->head;
+    node->head = BUF_NEXT(head);
+    BUF_NEXT(head) = NULL;
+
+    pthread_mutex_unlock(pool->lock);
+    *buffer = head;
+    return 0;
+}
+
+int castle_shared_pool_release(castle_shared_pool* pool, castle_buffer* buffer, __attribute__((unused)) unsigned long size)
+{
+    if(!pool || !buffer)
+        return -EINVAL;
+
+    pthread_mutex_lock(pool->lock);
+
+    pool_node* node = find_size_locked(pool, buffer->buflen, false);
+    BUF_NEXT(buffer) = node->head;
+    node->head = buffer;
+
+    pthread_cond_signal(pool->sig);
+    pthread_mutex_unlock(pool->lock);
+    return 0;
+}
+
+int castle_shared_pool_create(castle_connection* conn, size_t nsizes, size_t* sizes, size_t* quantities, castle_shared_pool** pool_out)
+{
+    if(!conn || !nsizes || !sizes || !quantities || !pool_out || *pool_out)
+        return -EINVAL;
+
+    castle_shared_pool* pool = (castle_shared_pool*)calloc(1, sizeof(*pool));
+    pool->lock = (pthread_mutex_t*)calloc(1, sizeof(*pool->lock));
+    pool->sig = (pthread_cond_t*)calloc(1, sizeof(*pool->sig));
+
+    pthread_mutex_init(pool->lock, NULL);
+    pthread_cond_init(pool->sig, NULL);
+    pool->conn = conn;
+
+    pool->free = (pool_node*)calloc(nsizes, sizeof(*pool->free));
+    pool->nsizes = nsizes;
+
+    for(size_t i = 0; i < nsizes; ++i)
+    {
+        size_t size = max(sizes[i], MIN_SIZE);
+        pool->free[i].size = size;
+
+        for(size_t n = 0; n < quantities[i]; ++n)
+        {
+            castle_buffer* node = NULL;
+            int ret = castle_shared_buffer_allocate(conn, &node, size);
+            if (ret)
+            {
+                castle_shared_pool_destroy(pool);
+                return -ENOMEM;
+            }
+            BUF_NEXT(node) = pool->free[i].head;
+            pool->free[i].head = node;
+        }
+    }
+
+    qsort(pool->free, nsizes, sizeof(pool_node), node_cmp);
+
+    *pool_out = pool;
+    return 0;
+}
+
+int castle_shared_pool_destroy(castle_shared_pool* pool)
+{
+    if(!pool)
+        return 0;
+
+    for(size_t i = 0; i < pool->nsizes; ++i)
+    {
+        while(pool->free[i].head)
+        {
+            castle_buffer* head = pool->free[i].head;
+            pool->free[i].head = BUF_NEXT(head);
+            castle_shared_buffer_release(pool->conn, head);
+        }
+    }
+
+    free(pool->free);
+
+    pthread_cond_destroy(pool->sig);
+    free(pool->sig);
+
+    pthread_mutex_destroy(pool->lock);
+    free(pool->lock);
+
+    free(pool);
+    return 0;
+}
+
+static int dir_exists(const char* path)
+{
+    struct stat attr;
+    if(stat(path, &attr))
+        return 0;
+    return S_ISDIR(attr.st_mode);
+}
+
+static long filesize(FILE* file)
+{
+    if(!file)
+        return -EINVAL;
+    long cur = ftell(file);
+    fseek(file, 0, SEEK_END);
+    long size = ftell(file);
+    fseek(file, cur, SEEK_SET);
+    return size;
+}
+
+static int castle_collection_name_get(const char* coll_path, char** name)
+{
+    if(!name || *name)
+        return -EINVAL;
+    int ret = 0;
+    FILE* file = fopen(coll_path, "r");
+    if(!file)
+        return -errno;
+    long size = filesize(file);
+    *name = (char*)calloc(1, size + 1);
+    if(!*name)
+    {
+        ret = -ENOMEM;
+        goto out1;
+    }
+    char* p = *name;
+    while(!feof(file) && p < *name + size)
+        p += fread(p, *name + size - p, 1, file);
+
+    p = *name;
+    while(*p != '\n' && *p++);
+    *p = '\0';
+
+out1: fclose(file);
+    return ret;
+}
+
+static const char* collections_path = "/sys/fs/castle-fs/collections";
+
+int castle_collection_find(const char* name, castle_collection* coll)
+{
+    if(!name || !*name || !coll)
+        return -EINVAL;
+    int ret = 0;
+    DIR* dir = opendir(collections_path);
+    if(!dir)
+    {
+        ret = -errno;
+        goto out1;
+    }
+    struct dirent* entry;
+    char* cur_name = NULL;
+    while((entry = readdir(dir)))
+    {
+        char path[PATH_MAX] = {0};
+        snprintf(path, PATH_MAX, "%s/%s", collections_path, entry->d_name);
+        if(dir_exists(path))
+        {
+            snprintf(path, PATH_MAX, "%s/%s/name", collections_path, entry->d_name);
+            castle_collection_name_get(path, &cur_name);
+            if(cur_name && 0==strcmp(cur_name, name))
+            {
+                *coll = strtol(entry->d_name, NULL, 16);
+                goto out2;
+            }
+            free(cur_name); cur_name = NULL;
+        }
+    }
+
+    ret = -ENOENT;
+
+out2: free(cur_name);
+out1: closedir(dir);
+    return ret;
+}
diff --git a/list.h b/list.h
new file mode 100644
index 0000000..bd7b6f1
--- /dev/null
+++ b/list.h
@@ -0,0 +1,979 @@
+#ifndef _LINUX_LIST_H
+#define _LINUX_LIST_H
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+	struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+	struct list_head name = LIST_HEAD_INIT(name)
+
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+	list->next = list;
+	list->prev = list;
+}
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+#ifndef CONFIG_DEBUG_LIST
+static inline void __list_add(struct list_head *new,
+			      struct list_head *prev,
+			      struct list_head *next)
+{
+	next->prev = new;
+	new->next = next;
+	xen_wmb();
+	new->prev = prev;
+	prev->next = new;
+}
+#else
+extern void __list_add(struct list_head *new,
+			      struct list_head *prev,
+			      struct list_head *next);
+#endif
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+#ifndef CONFIG_DEBUG_LIST
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head, head->next);
+}
+#else
+extern void list_add(struct list_head *new, struct list_head *head);
+#endif
+
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+	__list_add(new, head->prev, head);
+}
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_add_rcu(struct list_head * new,
+		struct list_head * prev, struct list_head * next)
+{
+	new->next = next;
+	new->prev = prev;
+	next->prev = new;
+	prev->next = new;
+}
+
+/**
+ * list_add_rcu - add a new entry to rcu-protected list
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as list_add_rcu()
+ * or list_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * list_for_each_entry_rcu().
+ */
+static inline void list_add_rcu(struct list_head *new, struct list_head *head)
+{
+	__list_add_rcu(new, head, head->next);
+}
+
+/**
+ * list_add_tail_rcu - add a new entry to rcu-protected list
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as list_add_tail_rcu()
+ * or list_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * list_for_each_entry_rcu().
+ */
+static inline void list_add_tail_rcu(struct list_head *new,
+					struct list_head *head)
+{
+	__list_add_rcu(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+	next->prev = prev;
+	prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty on entry does not return true after this, the entry is
+ * in an undefined state.
+ */
+#ifndef CONFIG_DEBUG_LIST
+static inline void list_del(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+	entry->next = NULL;
+	entry->prev = NULL;
+}
+#else
+extern void list_del(struct list_head *entry);
+#endif
+
+/**
+ * list_del_rcu - deletes entry from list without re-initialization
+ * @entry: the element to delete from the list.
+ *
+ * Note: list_empty on entry does not return true after this,
+ * the entry is in an undefined state. It is useful for RCU based
+ * lockfree traversal.
+ *
+ * In particular, it means that we can not poison the forward
+ * pointers that may still be used for walking the list.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as list_del_rcu()
+ * or list_add_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * list_for_each_entry_rcu().
+ *
+ * Note that the caller is not permitted to immediately free
+ * the newly deleted entry.  Instead, either synchronize_rcu()
+ * or call_rcu() must be used to defer freeing until an RCU
+ * grace period has elapsed.
+ */
+static inline void list_del_rcu(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+	entry->prev = NULL;
+}
+
+/**
+ * list_replace - replace old entry by new one
+ * @old : the element to be replaced
+ * @new : the new element to insert
+ * Note: if 'old' was empty, it will be overwritten.
+ */
+static inline void list_replace(struct list_head *old,
+				struct list_head *new)
+{
+	new->next = old->next;
+	new->next->prev = new;
+	new->prev = old->prev;
+	new->prev->next = new;
+}
+
+static inline void list_replace_init(struct list_head *old,
+					struct list_head *new)
+{
+	list_replace(old, new);
+	INIT_LIST_HEAD(old);
+}
+
+/*
+ * list_replace_rcu - replace old entry by new one
+ * @old : the element to be replaced
+ * @new : the new element to insert
+ *
+ * The old entry will be replaced with the new entry atomically.
+ * Note: 'old' should not be empty.
+ */
+static inline void list_replace_rcu(struct list_head *old,
+				struct list_head *new)
+{
+	new->next = old->next;
+	new->prev = old->prev;
+	new->next->prev = new;
+	new->prev->next = new;
+	old->prev = NULL;
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+	__list_del(entry->prev, entry->next);
+	INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_move - delete from one list and add as another's head
+ * @list: the entry to move
+ * @head: the head that will precede our entry
+ */
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+        __list_del(list->prev, list->next);
+        list_add(list, head);
+}
+
+/**
+ * list_move_tail - delete from one list and add as another's tail
+ * @list: the entry to move
+ * @head: the head that will follow our entry
+ */
+static inline void list_move_tail(struct list_head *list,
+				  struct list_head *head)
+{
+        __list_del(list->prev, list->next);
+        list_add_tail(list, head);
+}
+
+/**
+ * list_is_last - tests whether @list is the last entry in list @head
+ * @list: the entry to test
+ * @head: the head of the list
+ */
+static inline int list_is_last(const struct list_head *list,
+				const struct list_head *head)
+{
+	return list->next == head;
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(const struct list_head *head)
+{
+	return head->next == head;
+}
+
+/**
+ * list_empty_careful - tests whether a list is empty and not being modified
+ * @head: the list to test
+ *
+ * Description:
+ * tests whether a list is empty _and_ checks that no other CPU might be
+ * in the process of modifying either member (next or prev)
+ *
+ * NOTE: using list_empty_careful() without synchronization
+ * can only be safe if the only activity that can happen
+ * to the list entry is list_del_init(). Eg. it cannot be used
+ * if another CPU could re-list_add() it.
+ */
+static inline int list_empty_careful(const struct list_head *head)
+{
+	struct list_head *next = head->next;
+	return (next == head) && (next == head->prev);
+}
+
+/**
+ * list_is_singular - tests whether a list has just one entry.
+ * @head: the list to test.
+ */
+static inline int list_is_singular(const struct list_head *head)
+{
+	return !list_empty(head) && (head->next == head->prev);
+}
+
+static inline void __list_splice(struct list_head *list,
+				 struct list_head *head)
+{
+	struct list_head *first = list->next;
+	struct list_head *last = list->prev;
+	struct list_head *at = head->next;
+
+	first->prev = head;
+	head->next = first;
+
+	last->next = at;
+	at->prev = last;
+}
+
+/**
+ * list_splice - join two lists
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice(struct list_head *list, struct list_head *head)
+{
+	if (!list_empty(list))
+		__list_splice(list, head);
+}
+
+/**
+ * list_splice_init - join two lists and reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_init(struct list_head *list,
+				    struct list_head *head)
+{
+	if (!list_empty(list)) {
+		__list_splice(list, head);
+		INIT_LIST_HEAD(list);
+	}
+}
+
+/**
+ * list_splice_init_rcu - splice an RCU-protected list into an existing list.
+ * @list:	the RCU-protected list to splice
+ * @head:	the place in the list to splice the first list into
+ * @sync:	function to sync: synchronize_rcu(), synchronize_sched(), ...
+ *
+ * @head can be RCU-read traversed concurrently with this function.
+ *
+ * Note that this function blocks.
+ *
+ * Important note: the caller must take whatever action is necessary to
+ *	prevent any other updates to @head.  In principle, it is possible
+ *	to modify the list as soon as sync() begins execution.
+ *	If this sort of thing becomes necessary, an alternative version
+ *	based on call_rcu() could be created.  But only if -really-
+ *	needed -- there is no shortage of RCU API members.
+ */
+static inline void list_splice_init_rcu(struct list_head *list,
+					struct list_head *head,
+					void (*sync_func)(void))
+{
+	struct list_head *first = list->next;
+	struct list_head *last = list->prev;
+	struct list_head *at = head->next;
+
+	if (list_empty(head))
+		return;
+
+	/* "first" and "last" tracking list, so initialize it. */
+
+	INIT_LIST_HEAD(list);
+
+	/*
+	 * At this point, the list body still points to the source list.
+	 * Wait for any readers to finish using the list before splicing
+	 * the list body into the new list.  Any new readers will see
+	 * an empty list.
+	 */
+
+	sync_func();
+
+	/*
+	 * Readers are finished with the source list, so perform splice.
+	 * The order is important if the new list is global and accessible
+	 * to concurrent RCU readers.  Note that RCU readers are not
+	 * permitted to traverse the prev pointers without excluding
+	 * this function.
+	 */
+
+	last->next = at;
+
+	head->next = first;
+	first->prev = head;
+	at->prev = last;
+}
+
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+/**
+ * Casts a member of a structure out to the containing structure
+ * @param ptr        the pointer to the member.
+ * @param type       the type of the container struct this is embedded in.
+ * @param member     the name of the member within the struct.
+ *
+ */
+#define container_of(ptr, type, member) ({                      \
+        const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
+        (type *)( (char *)__mptr - offsetof(type,member) );})
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr:	the &struct list_head pointer.
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_entry(ptr, type, member) \
+	container_of(ptr, type, member)
+
+/**
+ * list_first_entry - get the first element from a list
+ * @ptr:	the list head to take the element from.
+ * @type:	the type of the struct this is embedded in.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Note, that list is expected to be not empty.
+ */
+#define list_first_entry(ptr, type, member) \
+	list_entry((ptr)->next, type, member)
+
+/**
+ * list_first_entry - get the first element from a list
+ * @ptr:        the list head to take the element from.
+ * @type:       the type of the struct this is embedded in.
+ * @member:     the name of the list_struct within the struct.
+ *
+ * Note, that list is expected to be not empty.
+ */
+#define list_first_entry(ptr, type, member) \
+	list_entry((ptr)->next, type, member)
+
+/**
+ * list_for_each	-	iterate over a list
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ */
+#define list_for_each(pos, head) \
+	for (pos = (head)->next; pos != (head); \
+	pos = pos->next)
+
+/**
+ * __list_for_each	-	iterate over a list
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ *
+ * This variant differs from list_for_each() in that it's the
+ * simplest possible list iteration code, no prefetching is done.
+ * Use this for code that knows the list to be very short (empty
+ * or 1 entry) most of the time.
+ */
+#define __list_for_each(pos, head) \
+	for (pos = (head)->next; pos != (head); pos = pos->next)
+
+/**
+ * list_for_each_prev	-	iterate over a list backwards
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ */
+#define list_for_each_prev(pos, head) \
+	for (pos = (head)->prev; pos != (head); \
+	pos = pos->prev)
+
+/**
+ * list_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @n:		another &struct list_head to use as temporary storage
+ * @head:	the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+	for (pos = (head)->next, n = pos->next; pos != (head); \
+		pos = n, n = pos->next)
+
+/**
+ * list_for_each_entry	-	iterate over list of given type
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry(pos, head, member)				\
+	for (pos = list_entry((head)->next, typeof(*pos), member);	\
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_reverse - iterate backwards over list of given type.
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_reverse(pos, head, member)			\
+	for (pos = list_entry((head)->prev, typeof(*pos), member);	\
+	     pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+/**
+ * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue
+ * @pos:	the type * to use as a start point
+ * @head:	the head of the list
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Prepares a pos entry for use as a start point in list_for_each_entry_continue.
+ */
+#define list_prepare_entry(pos, head, member) \
+	((pos) ? : list_entry(head, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_continue - continue iteration over list of given type
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Continue to iterate over list of given type, continuing after
+ * the current position.
+ */
+#define list_for_each_entry_continue(pos, head, member)		\
+	for (pos = list_entry(pos->member.next, typeof(*pos), member);	\
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_continue_reverse - iterate backwards from the given point
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Start to iterate over list of given type backwards, continuing after
+ * the current position.
+ */
+#define list_for_each_entry_continue_reverse(pos, head, member)		\
+	for (pos = list_entry(pos->member.prev, typeof(*pos), member);	\
+	     pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_from - iterate over list of given type from the current point
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type, continuing from current position.
+ */
+#define list_for_each_entry_from(pos, head, member)			\
+	for (; &pos->member != (head);	\
+	     pos = list_entry(pos->member.next, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member)			\
+	for (pos = list_entry((head)->next, typeof(*pos), member),	\
+		n = list_entry(pos->member.next, typeof(*pos), member);	\
+	     &pos->member != (head);					\
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_continue
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type, continuing after current point,
+ * safe against removal of list entry.
+ */
+#define list_for_each_entry_safe_continue(pos, n, head, member)		\
+	for (pos = list_entry(pos->member.next, typeof(*pos), member),		\
+		n = list_entry(pos->member.next, typeof(*pos), member);		\
+	     &pos->member != (head);						\
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_from
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate over list of given type from current point, safe against
+ * removal of list entry.
+ */
+#define list_for_each_entry_safe_from(pos, n, head, member)			\
+	for (n = list_entry(pos->member.next, typeof(*pos), member);		\
+	     &pos->member != (head);						\
+	     pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+/**
+ * list_for_each_entry_safe_reverse
+ * @pos:	the type * to use as a loop cursor.
+ * @n:		another type * to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * Iterate backwards over list of given type, safe against removal
+ * of list entry.
+ */
+#define list_for_each_entry_safe_reverse(pos, n, head, member)		\
+	for (pos = list_entry((head)->prev, typeof(*pos), member),	\
+		n = list_entry(pos->member.prev, typeof(*pos), member);	\
+	     &pos->member != (head);					\
+	     pos = n, n = list_entry(n->member.prev, typeof(*n), member))
+
+/**
+ * list_for_each_rcu	-	iterate over an rcu-protected list
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ *
+ * This list-traversal primitive may safely run concurrently with
+ * the _rcu list-mutation primitives such as list_add_rcu()
+ * as long as the traversal is guarded by rcu_read_lock().
+ */
+#define list_for_each_rcu(pos, head) \
+	for (pos = (head)->next; \
+	pos = pos->next)
+
+#define __list_for_each_rcu(pos, head) \
+	for (pos = (head)->next; \
+		rcu_dereference(pos) != (head); \
+	pos = pos->next)
+
+/**
+ * list_for_each_safe_rcu
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @n:		another &struct list_head to use as temporary storage
+ * @head:	the head for your list.
+ *
+ * Iterate over an rcu-protected list, safe against removal of list entry.
+ *
+ * This list-traversal primitive may safely run concurrently with
+ * the _rcu list-mutation primitives such as list_add_rcu()
+ * as long as the traversal is guarded by rcu_read_lock().
+ */
+#define list_for_each_safe_rcu(pos, n, head) \
+	for (pos = (head)->next; \
+		n = rcu_dereference(pos)->next, pos != (head); \
+		pos = n)
+
+/**
+ * list_for_each_entry_rcu	-	iterate over rcu list of given type
+ * @pos:	the type * to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the list_struct within the struct.
+ *
+ * This list-traversal primitive may safely run concurrently with
+ * the _rcu list-mutation primitives such as list_add_rcu()
+ * as long as the traversal is guarded by rcu_read_lock().
+ */
+#define list_for_each_entry_rcu(pos, head, member) \
+	for (pos = list_entry((head)->next, typeof(*pos), member); \
+			&pos->member != (head); \
+		pos = list_entry(pos->member.next, typeof(*pos), member))
+
+
+/**
+ * list_for_each_continue_rcu
+ * @pos:	the &struct list_head to use as a loop cursor.
+ * @head:	the head for your list.
+ *
+ * Iterate over an rcu-protected list, continuing after current point.
+ *
+ * This list-traversal primitive may safely run concurrently with
+ * the _rcu list-mutation primitives such as list_add_rcu()
+ * as long as the traversal is guarded by rcu_read_lock().
+ */
+#define list_for_each_continue_rcu(pos, head) \
+	for ((pos) = (pos)->next; \
+		(pos) != (head); \
+	(pos) = (pos)->next)
+
+/*
+ * Double linked lists with a single pointer list head.
+ * Mostly useful for hash tables where the two pointer list head is
+ * too wasteful.
+ * You lose the ability to access the tail in O(1).
+ */
+
+struct hlist_head {
+	struct hlist_node *first;
+};
+
+struct hlist_node {
+	struct hlist_node *next, **pprev;
+};
+
+#define HLIST_HEAD_INIT { .first = NULL }
+#define HLIST_HEAD(name) struct hlist_head name = {  .first = NULL }
+#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
+static inline void INIT_HLIST_NODE(struct hlist_node *h)
+{
+	h->next = NULL;
+	h->pprev = NULL;
+}
+
+static inline int hlist_unhashed(const struct hlist_node *h)
+{
+	return !h->pprev;
+}
+
+static inline int hlist_empty(const struct hlist_head *h)
+{
+	return !h->first;
+}
+
+static inline void __hlist_del(struct hlist_node *n)
+{
+	struct hlist_node *next = n->next;
+	struct hlist_node **pprev = n->pprev;
+	*pprev = next;
+	if (next)
+		next->pprev = pprev;
+}
+
+static inline void hlist_del(struct hlist_node *n)
+{
+	__hlist_del(n);
+	n->next = NULL;
+	n->pprev = NULL;
+}
+
+/**
+ * hlist_del_rcu - deletes entry from hash list without re-initialization
+ * @n: the element to delete from the hash list.
+ *
+ * Note: list_unhashed() on entry does not return true after this,
+ * the entry is in an undefined state. It is useful for RCU based
+ * lockfree traversal.
+ *
+ * In particular, it means that we can not poison the forward
+ * pointers that may still be used for walking the hash list.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as hlist_add_head_rcu()
+ * or hlist_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * hlist_for_each_entry().
+ */
+static inline void hlist_del_rcu(struct hlist_node *n)
+{
+	__hlist_del(n);
+	n->pprev = NULL;
+}
+
+static inline void hlist_del_init(struct hlist_node *n)
+{
+	if (!hlist_unhashed(n)) {
+		__hlist_del(n);
+		INIT_HLIST_NODE(n);
+	}
+}
+
+static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
+{
+	struct hlist_node *first = h->first;
+	n->next = first;
+	if (first)
+		first->pprev = &n->next;
+	h->first = n;
+	n->pprev = &h->first;
+}
+
+
+/**
+ * hlist_add_head_rcu
+ * @n: the element to add to the hash list.
+ * @h: the list to add to.
+ *
+ * Description:
+ * Adds the specified element to the specified hlist,
+ * while permitting racing traversals.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as hlist_add_head_rcu()
+ * or hlist_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * hlist_for_each_entry_rcu(), used to prevent memory-consistency
+ * problems on Alpha CPUs.  Regardless of the type of CPU, the
+ * list-traversal primitive must be guarded by rcu_read_lock().
+ */
+static inline void hlist_add_head_rcu(struct hlist_node *n,
+					struct hlist_head *h)
+{
+	struct hlist_node *first = h->first;
+	n->next = first;
+	n->pprev = &h->first;
+
+	if (first)
+		first->pprev = &n->next;
+	h->first = n;
+}
+
+/* next must be != NULL */
+static inline void hlist_add_before(struct hlist_node *n,
+					struct hlist_node *next)
+{
+	n->pprev = next->pprev;
+	n->next = next;
+	next->pprev = &n->next;
+	*(n->pprev) = n;
+}
+
+static inline void hlist_add_after(struct hlist_node *n,
+					struct hlist_node *next)
+{
+	next->next = n->next;
+	n->next = next;
+	next->pprev = &n->next;
+
+	if(next->next)
+		next->next->pprev  = &next->next;
+}
+
+/**
+ * hlist_add_before_rcu
+ * @n: the new element to add to the hash list.
+ * @next: the existing element to add the new element before.
+ *
+ * Description:
+ * Adds the specified element to the specified hlist
+ * before the specified node while permitting racing traversals.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as hlist_add_head_rcu()
+ * or hlist_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * hlist_for_each_entry_rcu(), used to prevent memory-consistency
+ * problems on Alpha CPUs.
+ */
+static inline void hlist_add_before_rcu(struct hlist_node *n,
+					struct hlist_node *next)
+{
+	n->pprev = next->pprev;
+	n->next = next;
+
+	next->pprev = &n->next;
+	*(n->pprev) = n;
+}
+
+/**
+ * hlist_add_after_rcu
+ * @prev: the existing element to add the new element after.
+ * @n: the new element to add to the hash list.
+ *
+ * Description:
+ * Adds the specified element to the specified hlist
+ * after the specified node while permitting racing traversals.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as hlist_add_head_rcu()
+ * or hlist_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * hlist_for_each_entry_rcu(), used to prevent memory-consistency
+ * problems on Alpha CPUs.
+ */
+static inline void hlist_add_after_rcu(struct hlist_node *prev,
+				       struct hlist_node *n)
+{
+	n->next = prev->next;
+	n->pprev = &prev->next;
+
+	prev->next = n;
+	if (n->next)
+		n->next->pprev = &n->next;
+}
+
+#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define hlist_for_each(pos, head) \
+	for (pos = (head)->first; pos; \
+	     pos = pos->next)
+
+#define hlist_for_each_safe(pos, n, head) \
+	for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \
+	     pos = n)
+
+/**
+ * hlist_for_each_entry	- iterate over list of given type
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry(tpos, pos, head, member)			 \
+	for (pos = (head)->first;					 \
+	     pos &&			 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_for_each_entry_continue - iterate over a hlist continuing after current point
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_continue(tpos, pos, member)		 \
+	for (pos = (pos)->next;						 \
+	     pos &&			 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_for_each_entry_from - iterate over a hlist continuing from current point
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_from(tpos, pos, member)			 \
+	for (; pos &&			 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+/**
+ * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @n:		another &struct hlist_node to use as temporary storage
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_safe(tpos, pos, n, head, member)		 \
+	for (pos = (head)->first;					 \
+	     pos && ({ n = pos->next; 1; }) &&				 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = n)
+
+/**
+ * hlist_for_each_entry_rcu - iterate over rcu list of given type
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ *
+ * This list-traversal primitive may safely run concurrently with
+ * the _rcu list-mutation primitives such as hlist_add_head_rcu()
+ * as long as the traversal is guarded by rcu_read_lock().
+ */
+#define hlist_for_each_entry_rcu(tpos, pos, head, member)		 \
+	for (pos = (head)->first;					 \
+	     rcu_dereference(pos) &&	 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
+	     pos = pos->next)
+
+#endif
diff --git a/ring.h b/ring.h
new file mode 100644
index 0000000..a668f8a
--- /dev/null
+++ b/ring.h
@@ -0,0 +1,343 @@
+/******************************************************************************
+ * ring.h
+ *
+ * Shared producer-consumer ring macros.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Tim Deegan and Andrew Warfield November 2004.
+ */
+
+#ifndef __XEN_PUBLIC_IO_RING_H__
+#define __XEN_PUBLIC_IO_RING_H__
+
+//#include "../xen-compat.h"
+
+//#if __XEN_INTERFACE_VERSION__ < 0x00030208
+#ifdef KERNEL
+#define xen_mb()  mb()
+#define xen_rmb() rmb()
+#define xen_wmb() wmb()
+#else
+#if defined(__i386__)
+#define xen_mb()  asm volatile ( "lock; addl $0,0(%%esp)" : : : "memory" )
+#define xen_rmb() asm volatile ( "lock; addl $0,0(%%esp)" : : : "memory" )
+#define xen_wmb() asm volatile ( "" : : : "memory")
+#elif defined(__x86_64__)
+#define xen_mb()  asm volatile ( "mfence" : : : "memory")
+#define xen_rmb() asm volatile ( "lfence" : : : "memory")
+#define xen_wmb() asm volatile ( "" : : : "memory")
+#elif defined(__ia64__)
+#define xen_mb()   asm volatile ("mf" ::: "memory")
+#define xen_rmb()  asm volatile ("mf" ::: "memory")
+#define xen_wmb()  asm volatile ("mf" ::: "memory")
+#else
+#error "Define barriers"
+#endif
+#endif
+//#endif
+
+typedef unsigned int RING_IDX;
+
+/* Round a 32-bit unsigned constant down to the nearest power of two. */
+#define __RD2(_x)  (((_x) & 0x00000002) ? 0x2                  : ((_x) & 0x1))
+#define __RD4(_x)  (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2    : __RD2(_x))
+#define __RD8(_x)  (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4    : __RD4(_x))
+#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8    : __RD8(_x))
+#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x))
+
+/*
+ * Calculate size of a shared ring, given the total available space for the
+ * ring and indexes (_sz), and the name tag of the request/response structure.
+ * A ring contains as many entries as will fit, rounded down to the nearest
+ * power of two (so we can mask with (size-1) to loop around).
+ */
+#define __CONST_RING_SIZE(_s, _sz) \
+    (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \
+	    sizeof(((struct _s##_sring *)0)->ring[0])))
+/*
+ * The same for passing in an actual pointer instead of a name tag.
+ */
+#define __RING_SIZE(_s, _sz) \
+    (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0])))
+
+/*
+ * Macros to make the correct C datatypes for a new kind of ring.
+ *
+ * To make a new ring datatype, you need to have two message structures,
+ * let's say request_t, and response_t already defined.
+ *
+ * In a header where you want the ring datatype declared, you then do:
+ *
+ *     DEFINE_RING_TYPES(mytag, request_t, response_t);
+ *
+ * These expand out to give you a set of types, as you can see below.
+ * The most important of these are:
+ *
+ *     mytag_sring_t      - The shared ring.
+ *     mytag_front_ring_t - The 'front' half of the ring.
+ *     mytag_back_ring_t  - The 'back' half of the ring.
+ *
+ * To initialize a ring in your code you need to know the location and size
+ * of the shared memory area (PAGE_SIZE, for instance). To initialise
+ * the front half:
+ *
+ *     mytag_front_ring_t front_ring;
+ *     SHARED_RING_INIT((mytag_sring_t *)shared_page);
+ *     FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
+ *
+ * Initializing the back follows similarly (note that only the front
+ * initializes the shared ring):
+ *
+ *     mytag_back_ring_t back_ring;
+ *     BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
+ */
+
+#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t)                     \
+                                                                        \
+/* Shared ring entry */                                                 \
+union __name##_sring_entry {                                            \
+    __req_t req;                                                        \
+    __rsp_t rsp;                                                        \
+};                                                                      \
+                                                                        \
+/* Shared ring page */                                                  \
+struct __name##_sring {                                                 \
+    RING_IDX req_prod, req_event;                                       \
+    RING_IDX rsp_prod, rsp_event;                                       \
+    union {                                                             \
+        struct {                                                        \
+            uint8_t smartpoll_active;                                   \
+        } netif;                                                        \
+        struct {                                                        \
+            uint8_t msg;                                                \
+        } tapif_user;                                                   \
+        uint8_t pvt_pad[4];                                             \
+    } private;                                                          \
+    uint8_t __pad[44];                                                  \
+    union __name##_sring_entry ring[1]; /* variable-length */           \
+};                                                                      \
+                                                                        \
+/* "Front" end's private variables */                                   \
+struct __name##_front_ring {                                            \
+    RING_IDX req_prod_pvt;                                              \
+    RING_IDX rsp_cons;                                                  \
+    unsigned int nr_ents;                                               \
+  unsigned int reserved;                                                \
+    struct __name##_sring *sring;                                       \
+};                                                                      \
+                                                                        \
+/* "Back" end's private variables */                                    \
+struct __name##_back_ring {                                             \
+    RING_IDX rsp_prod_pvt;                                              \
+    RING_IDX req_cons;                                                  \
+    unsigned int nr_ents;                                               \
+    struct __name##_sring *sring;                                       \
+};                                                                      \
+                                                                        \
+/* Syntactic sugar */                                                   \
+typedef struct __name##_sring __name##_sring_t;                         \
+typedef struct __name##_front_ring __name##_front_ring_t;               \
+typedef struct __name##_back_ring __name##_back_ring_t
+
+/*
+ * Macros for manipulating rings.
+ *
+ * FRONT_RING_whatever works on the "front end" of a ring: here
+ * requests are pushed on to the ring and responses taken off it.
+ *
+ * BACK_RING_whatever works on the "back end" of a ring: here
+ * requests are taken off the ring and responses put on.
+ *
+ * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL.
+ * This is OK in 1-for-1 request-response situations where the
+ * requestor (front end) never has more than RING_SIZE()-1
+ * outstanding requests.
+ */
+
+/* Initialising empty rings */
+#define SHARED_RING_INIT(_s) do {                                       \
+    (_s)->req_prod  = (_s)->rsp_prod  = 0;                              \
+    (_s)->req_event = (_s)->rsp_event = 1;                              \
+    (void)memset((_s)->private.pvt_pad, 0, sizeof((_s)->private.pvt_pad)); \
+    (void)memset((_s)->__pad, 0, sizeof((_s)->__pad));                  \
+} while(0)
+
+#define FRONT_RING_INIT(_r, _s, __size, __reserved) do {                \
+    (_r)->req_prod_pvt = 0;                                             \
+    (_r)->rsp_cons = 0;                                                 \
+    (_r)->nr_ents = __RING_SIZE(_s, __size);                            \
+    (_r)->reserved = __reserved;                                        \
+    (_r)->sring = (_s);                                                 \
+} while (0)
+
+#define BACK_RING_INIT(_r, _s, __size) do {                             \
+    (_r)->rsp_prod_pvt = 0;                                             \
+    (_r)->req_cons = 0;                                                 \
+    (_r)->nr_ents = __RING_SIZE(_s, __size);                            \
+    (_r)->sring = (_s);                                                 \
+} while (0)
+
+/* Initialize to existing shared indexes -- for recovery */
+#define FRONT_RING_ATTACH(_r, _s, __size) do {                          \
+    (_r)->sring = (_s);                                                 \
+    (_r)->req_prod_pvt = (_s)->req_prod;                                \
+    (_r)->rsp_cons = (_s)->rsp_prod;                                    \
+    (_r)->nr_ents = __RING_SIZE(_s, __size);                            \
+} while (0)
+
+#define BACK_RING_ATTACH(_r, _s, __size) do {                           \
+    (_r)->sring = (_s);                                                 \
+    (_r)->rsp_prod_pvt = (_s)->rsp_prod;                                \
+    (_r)->req_cons = (_s)->req_prod;                                    \
+    (_r)->nr_ents = __RING_SIZE(_s, __size);                            \
+} while (0)
+
+/* How big is this ring? */
+#define RING_SIZE(_r)                                                   \
+    ((_r)->nr_ents)
+
+/* Number of free requests (for use on front side only). */
+#define RING_FREE_REQUESTS(_r)                                          \
+    (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons))
+
+/* Test if there is an empty slot available on the front ring.
+ * (This is only meaningful from the front. )
+ */
+#define RING_FULL(_r)                                                   \
+    (RING_FREE_REQUESTS(_r) == 0)
+
+/* Test if there are outstanding messages to be processed on a ring. */
+#define RING_HAS_UNCONSUMED_RESPONSES(_r)                               \
+    ((_r)->sring->rsp_prod - (_r)->rsp_cons)
+
+#ifdef __GNUC__
+#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({                             \
+    unsigned int req = (_r)->sring->req_prod - (_r)->req_cons;          \
+    unsigned int rsp = RING_SIZE(_r) -                                  \
+        ((_r)->req_cons - (_r)->rsp_prod_pvt);                          \
+    req < rsp ? req : rsp;                                              \
+})
+#else
+/* Same as above, but without the nice GCC ({ ... }) syntax. */
+#define RING_HAS_UNCONSUMED_REQUESTS(_r)                                \
+    ((((_r)->sring->req_prod - (_r)->req_cons) <                        \
+      (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ?        \
+     ((_r)->sring->req_prod - (_r)->req_cons) :                         \
+     (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt)))
+#endif
+
+/* Direct access to individual ring elements, by index. */
+#define RING_GET_REQUEST(_r, _idx)                                      \
+    (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req))
+
+#define RING_GET_RESPONSE(_r, _idx)                                     \
+    (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp))
+
+/* Loop termination condition: Would the specified index overflow the ring? */
+#define RING_REQUEST_CONS_OVERFLOW(_r, _cons)                           \
+    (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))
+
+#define RING_PUSH_REQUESTS(_r) do {                                     \
+    xen_wmb(); /* back sees requests /before/ updated producer index */ \
+    (_r)->sring->req_prod = (_r)->req_prod_pvt;                         \
+} while (0)
+
+#define RING_PUSH_RESPONSES(_r) do {                                    \
+    xen_wmb(); /* front sees resps /before/ updated producer index */   \
+    (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt;                         \
+} while (0)
+
+/*
+ * Notification hold-off (req_event and rsp_event):
+ *
+ * When queueing requests or responses on a shared ring, it may not always be
+ * necessary to notify the remote end. For example, if requests are in flight
+ * in a backend, the front may be able to queue further requests without
+ * notifying the back (if the back checks for new requests when it queues
+ * responses).
+ *
+ * When enqueuing requests or responses:
+ *
+ *  Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument
+ *  is a boolean return value. True indicates that the receiver requires an
+ *  asynchronous notification.
+ *
+ * After dequeuing requests or responses (before sleeping the connection):
+ *
+ *  Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES().
+ *  The second argument is a boolean return value. True indicates that there
+ *  are pending messages on the ring (i.e., the connection should not be put
+ *  to sleep).
+ *
+ *  These macros will set the req_event/rsp_event field to trigger a
+ *  notification on the very next message that is enqueued. If you want to
+ *  create batches of work (i.e., only receive a notification after several
+ *  messages have been enqueued) then you will need to create a customised
+ *  version of the FINAL_CHECK macro in your own code, which sets the event
+ *  field appropriately.
+ */
+
+#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do {           \
+    RING_IDX __old = (_r)->sring->req_prod;                             \
+    RING_IDX __new = (_r)->req_prod_pvt;                                \
+    xen_wmb(); /* back sees requests /before/ updated producer index */ \
+    (_r)->sring->req_prod = __new;                                      \
+    xen_mb(); /* back sees new requests /before/ we check req_event */  \
+    (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) <           \
+                 (RING_IDX)(__new - __old));                            \
+} while (0)
+
+#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do {          \
+    RING_IDX __old = (_r)->sring->rsp_prod;                             \
+    RING_IDX __new = (_r)->rsp_prod_pvt;                                \
+    xen_wmb(); /* front sees resps /before/ updated producer index */   \
+    (_r)->sring->rsp_prod = __new;                                      \
+    xen_mb(); /* front sees new resps /before/ we check rsp_event */    \
+    (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) <           \
+                 (RING_IDX)(__new - __old));                            \
+} while (0)
+
+#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do {             \
+    (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);                   \
+    if (_work_to_do) break;                                             \
+    (_r)->sring->req_event = (_r)->req_cons + 1;                        \
+    xen_mb();                                                           \
+    (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);                   \
+} while (0)
+
+#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do {            \
+    (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);                  \
+    if (_work_to_do) break;                                             \
+    (_r)->sring->rsp_event = (_r)->rsp_cons + 1;                        \
+    xen_mb();                                                           \
+    (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);                  \
+} while (0)
+
+#endif /* __XEN_PUBLIC_IO_RING_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/versions b/versions
new file mode 100644
index 0000000..7f19116
--- /dev/null
+++ b/versions
@@ -0,0 +1,78 @@
+CASTLE_1 {
+  global:
+        /* Data path */
+        castle_connect;
+        castle_disconnect;
+        castle_free;
+        castle_request_do_blocking;
+        castle_request_do_blocking_multi;
+        castle_request_send;
+        castle_shared_buffer_create;
+        castle_shared_buffer_destroy;
+        castle_shared_buffer_allocate;
+        castle_shared_buffer_release;
+        castle_get;
+        castle_replace;
+        castle_remove;
+        castle_iter_start;
+        castle_iter_next;
+        castle_iter_finish;
+        castle_kvs_free;
+        castle_getslice;
+        castle_big_put;
+        castle_put_chunk;
+        castle_big_get;
+        castle_get_chunk;
+
+        castle_build_key;
+        castle_build_key_len;
+        castle_key_bytes_needed;
+        castle_malloc_key;
+
+        castle_print_key;
+        castle_print_request;
+        castle_print_response;
+
+        /* Control path */
+        castle_attach;
+        castle_attach_dev;
+        castle_claim;
+        castle_claim_dev;
+        castle_clone;
+        castle_collection_attach;
+        castle_collection_detach;
+        castle_collection_snapshot;
+        castle_environment_set;
+        castle_trace_setup;
+        castle_trace_start;
+        castle_trace_stop;
+        castle_trace_teardown;
+        castle_fault;
+        castle_slave_evacuate;
+        castle_slave_scan;
+        castle_thread_priority;
+        castle_create;
+        castle_detach;
+        castle_detach_dev;
+        castle_init;
+        castle_release;
+        castle_snapshot;
+        castle_snapshot_dev;
+        castle_destroy;
+
+        castle_device_to_devno;
+        castle_devno_to_device;
+
+        castle_max_buffer_size;
+
+        /* Shared buffer pool */
+        castle_shared_pool_create;
+        castle_shared_pool_destroy;
+        castle_shared_pool_lease;
+        castle_shared_pool_release;
+        
+        /* Collection utils */
+        castle_collection_find;
+        
+  local: *;
+};