diff --git a/kernel/loader-ll.c b/kernel/loader-ll.c
new file mode 100644
index 0000000000000000000000000000000000000000..9de1d5499ee2c3e99fc28ad27381e1377cbf2288
--- /dev/null
+++ b/kernel/loader-ll.c
@@ -0,0 +1,143 @@
+/*
+ * This is the low-level engine of firmware loading. It is meant
+ * to be compiled both as kernel code and user code, using the associated
+ * header to differentiate
+ */
+
+#define __LOADER_LL_C__ /* Callers won't define this symbol */
+
+#include "spec.h"
+#include "loader-ll.h"
+
+/* These must be set to choose the FPGA configuration mode */
+#define GPIO_BOOTSEL0 15
+#define GPIO_BOOTSEL1 14
+
+static inline uint8_t reverse_bits8(uint8_t x)
+{
+	x = ((x >> 1) & 0x55) | ((x & 0x55) << 1);
+	x = ((x >> 2) & 0x33) | ((x & 0x33) << 2);
+	x = ((x >> 4) & 0x0f) | ((x & 0x0f) << 4);
+
+	return x;
+}
+
+static uint32_t unaligned_bitswap_le32(const uint32_t *ptr32)
+{
+	static uint32_t tmp32;
+	static uint8_t *tmp8 = (uint8_t *) &tmp32;
+	static uint8_t *ptr8;
+
+	ptr8 = (uint8_t *) ptr32;
+
+	*(tmp8 + 0) = reverse_bits8(*(ptr8 + 0));
+	*(tmp8 + 1) = reverse_bits8(*(ptr8 + 1));
+	*(tmp8 + 2) = reverse_bits8(*(ptr8 + 2));
+	*(tmp8 + 3) = reverse_bits8(*(ptr8 + 3));
+
+	return tmp32;
+}
+
+static inline void gpio_out(int fd, void __iomem *bar4, const uint32_t addr, const int bit, const int value)
+{
+	uint32_t reg;
+
+	reg = lll_read(fd, bar4, addr);
+
+	if(value)
+		reg |= (1<<bit);
+	else
+		reg &= ~(1<<bit);
+
+	lll_write(fd, bar4, reg, addr);
+}
+
+/*
+ * Unfortunately, most of the following is from fcl_gn4124.cpp, for which
+ * the license terms are at best ambiguous. 
+ */
+
+int loader_low_level(int fd, void __iomem *bar4, const void *data, int size8)
+{
+	int size32 = (size8 + 3) >> 2;
+	const uint32_t *data32 = data;
+	int ctrl = 0, i, done = 0, wrote = 0;
+
+
+	/* configure Gennum GPIO to select GN4124->FPGA configuration mode */
+	gpio_out(fd, bar4, GNGPIO_DIRECTION_MODE, GPIO_BOOTSEL0, 0);
+	gpio_out(fd, bar4, GNGPIO_DIRECTION_MODE, GPIO_BOOTSEL1, 0);
+	gpio_out(fd, bar4, GNGPIO_OUTPUT_ENABLE, GPIO_BOOTSEL0, 1);
+	gpio_out(fd, bar4, GNGPIO_OUTPUT_ENABLE, GPIO_BOOTSEL1, 1);
+	gpio_out(fd, bar4, GNGPIO_OUTPUT_VALUE, GPIO_BOOTSEL0, 1);
+	gpio_out(fd, bar4, GNGPIO_OUTPUT_VALUE, GPIO_BOOTSEL1, 0);
+
+
+	lll_write(fd, bar4, 0x00, FCL_CLK_DIV);
+	lll_write(fd, bar4, 0x40, FCL_CTRL); /* Reset */
+	i = lll_read(fd, bar4, FCL_CTRL);
+	if (i != 0x40) {
+		printk(KERN_ERR "%s: %i: error\n", __func__, __LINE__);
+		return -EIO;
+	}
+	lll_write(fd, bar4, 0x00, FCL_CTRL);
+
+	lll_write(fd, bar4, 0x00, FCL_IRQ); /* clear pending irq */
+
+	switch(size8 & 3) {
+	case 3: ctrl = 0x116; break;
+	case 2: ctrl = 0x126; break;
+	case 1: ctrl = 0x136; break;
+	case 0: ctrl = 0x106; break;
+	}
+	lll_write(fd, bar4, ctrl, FCL_CTRL);
+
+	lll_write(fd, bar4, 0x00, FCL_CLK_DIV); /* again? maybe 1 or 2? */
+
+	lll_write(fd, bar4, 0x00, FCL_TIMER_CTRL); /* "disable FCL timr fun" */
+
+	lll_write(fd, bar4, 0x10, FCL_TIMER_0); /* "pulse width" */
+	lll_write(fd, bar4, 0x00, FCL_TIMER_1);
+
+	/*
+	 * Set delay before data and clock is applied by FCL
+	 * after SPRI_STATUS is	detected being assert.
+	 */
+	lll_write(fd, bar4, 0x08, FCL_TIMER2_0); /* "delay before data/clk" */
+	lll_write(fd, bar4, 0x00, FCL_TIMER2_1);
+	lll_write(fd, bar4, 0x17, FCL_EN); /* "output enable" */
+
+	ctrl |= 0x01; /* "start FSM configuration" */
+	lll_write(fd, bar4, ctrl, FCL_CTRL);
+
+	while(size32 > 0)
+	{
+		/* Check to see if FPGA configuation has error */
+		i = lll_read(fd, bar4, FCL_IRQ);
+		if ( (i & 8) && wrote) {
+			done = 1;
+			printk("%s: %i: done after %i\n", __func__, __LINE__,
+				wrote);
+		} else if ( (i & 0x4) && !done) {
+			printk("%s: %i: error after %i\n", __func__, __LINE__,
+				wrote);
+			return -EIO;
+		}
+
+		/* Wait until at least 1/2 of the fifo is empty */
+		while (lll_read(fd, bar4, FCL_IRQ)  & (1<<5))
+			;
+
+		/* Write a few dwords into FIFO at a time. */
+		for (i = 0; size32 && i < 32; i++) {
+			lll_write(fd, bar4, unaligned_bitswap_le32(data32),
+				  FCL_FIFO);
+			data32++; size32--; wrote++;
+		}
+	}
+
+	lll_write(fd, bar4, 0x186, FCL_CTRL); /* "last data written" */
+
+	/* Checking for the "interrupt" condition is left to the caller */
+	return wrote;
+}
diff --git a/kernel/loader-ll.h b/kernel/loader-ll.h
new file mode 100644
index 0000000000000000000000000000000000000000..c728ca61f5ec84e6c8c7439fa1fac6e1edde6e50
--- /dev/null
+++ b/kernel/loader-ll.h
@@ -0,0 +1,70 @@
+/*
+ * This header differentiates between kernel-mode and user-mode compilation,
+ * as loader-ll.c is meant to be used in both contexts.
+ */
+
+#ifndef __iomem
+#define __iomem /* nothing, for user space */
+#endif
+
+extern int loader_low_level(
+	int fd,			/* This is ignored in kernel space */
+	void __iomem *bar4,	/* This is ignored in user space */
+	const void *data,
+	int size8);
+
+
+/* The following part implements a different access rule for user and kernel */
+#ifdef __LOADER_LL_C__
+
+#ifdef __KERNEL__
+
+#include <asm/io.h>
+//#include <linux/kernel.h> /* for printk */
+
+static inline void lll_write(int fd, void __iomem *bar4, u32 val, int reg)
+{
+	writel(val, bar4 + reg);
+}
+
+static inline u32 lll_read(int fd, void __iomem *bar4, int reg)
+{
+	return readl(bar4 + reg);
+}
+
+#else /* ! __KERNEL__ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+
+static inline void lll_write(int fd, void __iomem *bar4, uint32_t val, int reg)
+{
+	struct rr_iocmd iocmd = {
+		.datasize = 4,
+		.address = reg | __RR_SET_BAR(4),
+	};
+
+	iocmd.data32 = val;
+	if (ioctl(fd, RR_WRITE, &iocmd) < 0) perror("ioctl");
+	return;
+}
+
+static inline uint32_t lll_read(int fd, void __iomem *bar4, int reg)
+{
+	struct rr_iocmd iocmd = {
+		.datasize = 4,
+		.address = reg | __RR_SET_BAR(4),
+	};
+
+	if (ioctl(fd, RR_READ, &iocmd) < 0) perror("ioctl");
+	return iocmd.data32;
+}
+
+#define KERN_ERR /* nothing */
+#define printk(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+
+#endif
+
+#endif /* __LOADER_LL_C__ */