diff --git a/api/tools/eb-ls.c b/api/tools/eb-ls.c
index 4b9b9d4e4698d00608477ce03050fa289d55ccee..3657917f11dcec8c0137aae5b69aeae37a788cf2 100644
--- a/api/tools/eb-ls.c
+++ b/api/tools/eb-ls.c
@@ -121,7 +121,6 @@ int main(int argc, const char** argv) {
   
   stop = 0;
   while (!stop) {
-    eb_device_flush(device);
     eb_socket_block(socket, -1);
     eb_socket_poll(socket);
   }
diff --git a/api/tools/eb-write.c b/api/tools/eb-write.c
index 0e3b1289cadea29a40598afdc4623c149239c148..caccbfd4108a7df3cc6ec6b6510b6efe2ac7882b 100644
--- a/api/tools/eb-write.c
+++ b/api/tools/eb-write.c
@@ -1,5 +1,5 @@
 /** @file eb-write.c
- *  @brief A demonstration program which executes an Etherbone write.
+ *  @brief A tool for executing Etherbone writes.
  *
  *  Copyright (C) 2011-2012 GSI Helmholtz Centre for Heavy Ion Research GmbH 
  *
@@ -25,77 +25,512 @@
  *******************************************************************************
  */
 
-#define _POSIX_C_SOURCE 200112L /* strtoull */
+#define _POSIX_C_SOURCE 200112L /* strtoull + getopt */
 
+#include <unistd.h> /* getopt */
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
+
 #include "../etherbone.h"
 
+static const char* endian_str[4] = {
+ /*  0 */ "auto-endian",
+ /*  1 */ "big-endian",
+ /*  2 */ "little-endian",
+ /*  3 */ "invalid-endian"
+};
+
+static const char* width_str[16] = {
+ /*  0 */ "<null>",
+ /*  1 */ "8",
+ /*  2 */ "16",
+ /*  3 */ "8/16",
+ /*  4 */ "32",
+ /*  5 */ "8/32",
+ /*  6 */ "16/32",
+ /*  7 */ "8/16/32",
+ /*  8 */ "64",
+ /*  9 */ "8/64",
+ /* 10 */ "16/64",
+ /* 11 */ "8/16/64",
+ /* 12 */ "32/64",
+ /* 13 */ "8/32/64",
+ /* 14 */ "16/32/64",
+ /* 15 */ "8/16/32/64"
+};
+
+static int parse_width(char* str) {
+  int width, widths;
+  char* next;
+  
+  widths = 0;
+  while (1) {
+    width = strtol(str, &next, 0);
+    if (width != 8 && width != 16 && width != 32 && width != 64) break;
+    widths |= width/8;
+    if (!*next) return widths;
+    if (*next != '/' && *next != ',') break;
+    str = next+1;
+  }
+  
+  return -1;
+}
+
+/* Command-line options */
+static eb_width_t address_width, data_width;
+static eb_format_t size, endian;
+static int verbose, quiet, attempts, probe, fidelity, error;
+static const char* netaddress;
+static const char* program;
+static eb_address_t address;
+static eb_data_t data;
+
+static void help(void) {
+  char revision[20] = "$Rev::            $";
+  char date[40]     = "$Date::                               $";
+  
+  *strchr(revision+6, ' ') = 0;
+  *strchr(date+7,     ' ') = 0;
+  
+  fprintf(stderr, "Usage: %s [OPTION] <proto/host/port> <address/size> <value>\n", program);
+  fprintf(stderr, "\n");
+  fprintf(stderr, "  -a <width>     acceptable address bus widths     (8/16/32/64)\n");
+  fprintf(stderr, "  -d <width>     acceptable data bus widths        (8/16/32/64)\n");
+  fprintf(stderr, "  -b             big-endian operation                    (auto)\n");
+  fprintf(stderr, "  -l             little-endian operation                 (auto)\n");
+  fprintf(stderr, "  -r <retries>   number of times to attempt autonegotiation (3)\n");
+  fprintf(stderr, "  -f             fidelity: do not fragment or read-before-write\n");
+  fprintf(stderr, "  -p             disable self-describing wishbone device probe\n");
+  fprintf(stderr, "  -v             verbose operation\n");
+  fprintf(stderr, "  -q             quiet: do not display warnings\n");
+  fprintf(stderr, "  -h             display this help and exit\n");
+  fprintf(stderr, "\n");
+  fprintf(stderr, "Report Etherbone bugs to <etherbone-core@ohwr.org>\n");
+  fprintf(stderr, "Version r%s (%s). Licensed under the LGPL v3.\n", revision+6, date+7);
+}
+
+static void find_device(eb_user_data_t data, sdwb_t sdwb, eb_status_t status) {
+  int i, devices;
+  eb_format_t size, dev_endian;
+  eb_format_t* device_support;
+  sdwb_device_descriptor_t des;
+  
+  device_support = (eb_format_t*)data;
+  
+  if (status != EB_OK) {
+    fprintf(stderr, "%s: failed to retrieve SDWB data: %s\n", program, eb_status(status));
+    exit(1);
+  }
+  
+  des = 0; /* silence warning */
+  devices = sdwb->header.wbddb_size / 80;
+  for (i = 0; i < devices; ++i) {
+    des = &sdwb->device_descriptor[i];
+    if ((des->wbd_flags & WBD_FLAG_PRESENT) == 0) continue;
+    
+    if (des->hdl_base <= address && address - des->hdl_base <= des->hdl_size) break;
+  }
+  
+  if (i == devices) {
+    if (!quiet)
+      fprintf(stderr, "%s: warning: could not locate Wishbone device at address %016"EB_ADDR_FMT"\n", 
+                      program, address);
+    *device_support = endian | EB_DATAX;
+  } else {
+    if ((des->wbd_flags & WBD_FLAG_LITTLE_ENDIAN) != 0)
+      dev_endian = EB_LITTLE_ENDIAN;
+    else
+      dev_endian = EB_BIG_ENDIAN;
+    
+    size = des->wbd_width & EB_DATAX;
+    
+    if (verbose)
+      fprintf(stdout, "  discovered Wishbone device at address %016"EB_ADDR_FMT" with %s %s-bit granularity\n",
+                      (eb_address_t)des->hdl_base, endian_str[dev_endian >> 4], width_str[size]);
+    
+    *device_support = dev_endian | size;
+  }
+}
+
 static void set_stop(eb_user_data_t user, eb_operation_t op, eb_status_t status) {
   int* stop = (int*)user;
   *stop = 1;
   
   if (status != EB_OK) {
-    fprintf(stdout, "%s\n", eb_status(status));
+    fprintf(stderr, "%s: etherbone cycle error: %s\n", 
+                    program, eb_status(status));
   } else {
-    if (eb_operation_had_error(op))
-      fprintf(stdout, " <<-- wishbone segfault -->>\n");
-    else
-      fprintf(stdout, "done\n");
+    for (; op != EB_NULL; op = eb_operation_next(op)) {
+      if (eb_operation_had_error(op))
+        fprintf(stderr, "%s: wishbone segfault %s %s %s bits to address %016"EB_ADDR_FMT"\n",
+                        program, eb_operation_is_read(op)?"reading":"writing",
+                        width_str[eb_operation_format(op) & EB_DATAX], 
+                        endian_str[eb_operation_format(op) >> 4], eb_operation_address(op));
+    }
   }
 }
 
-int main(int argc, const char** argv) {
+int main(int argc, char** argv) {
+  long value;
+  char* value_end;
+  int stop, opt;
+  
+  eb_data_t mask;
   eb_socket_t socket;
   eb_status_t status;
   eb_device_t device;
-  eb_width_t width;
+  eb_width_t line_width;
+  eb_format_t line_widths;
+  eb_format_t device_support;
+  eb_format_t write_sizes;
   eb_format_t format;
   eb_cycle_t cycle;
-  eb_address_t address;
-  eb_data_t data;
-  const char* netaddress;
-  int stop;
   
-  if (argc < 4 || argc > 5) {
-    fprintf(stderr, "Syntax: %s <protocol/host/port> <address> <data> [width]\n", argv[0]);
+  /* Default arguments */
+  program = argv[0];
+  address_width = EB_ADDRX;
+  data_width = EB_DATAX;
+  size = EB_DATAX;
+  endian = 0; /* auto-detect */
+  attempts = 3;
+  probe = 1;
+  fidelity = 0;
+  quiet = 0;
+  verbose = 0;
+  error = 0;
+  
+  /* Process the command-line arguments */
+  while ((opt = getopt(argc, argv, "a:d:blr:fpvqh")) != -1) {
+    switch (opt) {
+    case 'a':
+      value = parse_width(optarg);
+      if (value < 0) {
+        fprintf(stderr, "%s: invalid address width -- '%s'\n", program, optarg);
+        return 1;
+      }
+      address_width = value << 4;
+      break;
+    case 'd':
+      value = parse_width(optarg);
+      if (value < 0) {
+        fprintf(stderr, "%s: invalid data width -- '%s'\n", program, optarg);
+        return 1;
+      }
+      data_width = value;
+      break;
+    case 'b':
+      endian = EB_BIG_ENDIAN;
+      break;
+    case 'l':
+      endian = EB_LITTLE_ENDIAN;
+      break;
+    case 'r':
+      value = strtol(optarg, &value_end, 0);
+      if (*value_end || value < 0 || value > 100) {
+        fprintf(stderr, "%s: invalid number of retries -- '%s'\n", program, optarg);
+        return 1;
+      }
+      attempts = value;
+      break;
+    case 'f':
+      fidelity = 1;
+      break;
+    case 'p':
+      probe = 0;
+      break;
+    case 'v':
+      verbose = 1;
+      break;
+    case 'q':
+      quiet = 1;
+      break;
+    case 'h':
+      help();
+      return 1;
+    case ':':
+    case '?':
+      error = 1;
+      break;
+    default:
+      fprintf(stderr, "%s: bad getopt result\n", program);
+      return 1;
+    }
+  }
+  
+  if (error) return 1;
+  
+  if (optind + 3 != argc) {
+    fprintf(stderr, "%s: expecting three non-optional arguments: <proto/host/port> <address/size> <value>\n", program);
     return 1;
   }
   
-  netaddress = argv[1];
-  address = strtoull(argv[2], 0, 0);
-  data = strtoull(argv[3], 0, 0);
+  netaddress = argv[optind];
   
-  if (argc == 5)
-    format = strtoul(argv[4], 0, 0);
+  address = strtoull(argv[optind+1], &value_end, 0);
+  if (*value_end == '/')
+    size = strtoull(value_end+1, &value_end, 0);
   else
-    format = EB_DATAX;
+    size = 0;
+  if (*value_end != 0 || (size != 1 && size != 2 && size != 4 && size != 8)) {
+    fprintf(stderr, "%s: argument does not match format <address>/<1|2|3|4|8> -- '%s'\n",
+                    program, argv[optind+1]);
+    return 1;
+  }
   
-  if ((status = eb_socket_open(EB_ABI_CODE, 0, EB_DATAX|EB_ADDRX, &socket)) != EB_OK) {
-    fprintf(stderr, "Failed to open Etherbone socket: %s\n", eb_status(status));
+  if ((address & (size-1)) != 0) {
+    fprintf(stderr, "%s: %016"EB_ADDR_FMT" is not aligned to a %d byte boundary\n", 
+                    program, address, size);
     return 1;
   }
   
-  if ((status = eb_device_open(socket, netaddress, EB_ADDRX|EB_DATAX, 3, &device)) != EB_OK) {
-    fprintf(stderr, "Failed to open Etherbone device: %s\n", eb_status(status));
+  /* How big can the data be? */
+  mask = ~(eb_data_t)0;
+  mask >>= (sizeof(eb_data_t)-size)*8;
+  
+  data = strtoull(argv[optind+2], &value_end, 0);
+  if (*value_end != 0) {
+    fprintf(stderr, "%s: argument is not an unsigned value -- '%s'\n", 
+                    program, argv[optind+2]);
+    return 1;
+  }
+  if ((data & mask) != data) {
+    fprintf(stderr, "%s: %016"EB_DATA_FMT" cannot be represented in %d bytes\n", 
+                    program, data, size);
     return 1;
   }
   
-  width = eb_device_width(device);
-  fprintf(stdout, "Connected to %s with %d/%d-bit address/port widths\n\n", netaddress, (width >> 4) * 8, (width & EB_DATAX) * 8);
+  if (verbose)
+    fprintf(stdout, "Opening Etherbone socket supporting %s-bit address and %s-bit data widths\n", 
+                    width_str[address_width>>4], width_str[data_width]);
+  
+  if ((status = eb_socket_open(EB_ABI_CODE, 0, address_width|data_width, &socket)) != EB_OK) {
+    fprintf(stderr, "%s: failed to open Etherbone socket: %s\n", program, eb_status(status));
+    return 1;
+  }
+  
+  if (verbose)
+    fprintf(stdout, "Connecting to '%s' with %d retry attempts...\n", netaddress, attempts);
+  
+  if ((status = eb_device_open(socket, netaddress, EB_ADDRX|EB_DATAX, attempts, &device)) != EB_OK) {
+    fprintf(stderr, "%s: failed to open Etherbone device: %s\n", program, eb_status(status));
+    return 1;
+  }
+  
+  line_width = eb_device_width(device);
+  if (verbose)
+    fprintf(stdout, "  negotiated %s-bit address and %s-bit data session.\n", 
+                    width_str[line_width >> 4], width_str[line_width & EB_DATAX]);
+  
+  if (probe) {
+    if (verbose)
+      fprintf(stdout, "Scanning remote bus for Wishbone devices...\n");
+    device_support = 0;
+    if ((status = eb_sdwb_scan(device, &device_support, &find_device)) != EB_OK) {
+      fprintf(stderr, "%s: failed to scan remote bus: %s\n", program, eb_status(status));
+    }
+    while (device_support == 0) {
+      eb_socket_block(socket, -1);
+      eb_socket_poll(socket);
+    }
+  } else {
+    device_support = endian | EB_DATAX;
+  }
+  
+  /* Did the user request a bad endian? We use it anyway, but issue warning. */
+  if (endian != 0 && (device_support & EB_ENDIAN_MASK) != endian) {
+    if (!quiet)
+      fprintf(stderr, "%s: warning: target device is %s (writing as %s).\n",
+                      program, endian_str[device_support >> 4], endian_str[endian >> 4]);
+  }
+  
+  if (endian == 0) {
+    /* Select the probed endian. May still be 0 if device not found. */
+    endian = device_support & EB_ENDIAN_MASK;
+  }
+  
+  /* Final operation endian has been chosen. If 0 the access had better be a full data width access! */
+  format = endian;
   
-  fprintf(stdout, "Writing at %016"EB_ADDR_FMT": %016"EB_DATA_FMT": ", address, data);
-  fflush(stdout);
+  /* We need to pick the operation width we use.
+   * It must be supported both by the device and the line.
+   */
+  line_widths = ((line_width & EB_DATAX) << 1) - 1; /* Link can support any access smaller than line_width */
+  write_sizes = line_widths & device_support;
+    
+  /* We cannot work with a device that requires larger access than we support */
+  if (write_sizes == 0) {
+    fprintf(stderr, "%s: error: device's %s-bit data port cannot be used via a %s-bit wire format\n",
+                    program, width_str[device_support & EB_DATAX], width_str[line_width & EB_DATAX]);
+    return 1;
+  }
   
+  /* Begin the cycle */
   if ((cycle = eb_cycle_open(device, &stop, &set_stop)) == EB_NULL) {
-    fprintf(stdout, "out of memory\n");
+    fprintf(stderr, "%s: failed to create cycle: out of memory\n", program);
     return 1;
   }
   
-  eb_cycle_write(cycle, address, format, data);
-  eb_cycle_close(cycle);
+  /* Can the operation be performed with fidelity? */
+  if ((size & write_sizes) == 0) {
+    eb_format_t fragment_sizes;
+    eb_format_t fragment_size;
+    eb_format_t complete_size;
+    
+    /* We are about to screw with their operation to get it to work... */
+    if (fidelity) {
+      if ((size & line_widths) == 0)
+        fprintf(stderr, "%s: error: cannot perform a %s-bit write through a %s-bit connection\n",
+                        program, width_str[size], width_str[line_widths & EB_DATAX]);
+      else
+        fprintf(stderr, "%s: error: cannot perform a %s-bit write to a %s-bit device\n",
+                        program, width_str[size], width_str[device_support & EB_DATAX]);
+      return 1;
+    }
+    
+    /* What will we do? Prefer to fragment if possible; reading is evil. */
+    
+    /* Fragmented writing is possible if there is a bit in write_sizes smaller than a bit in size */
+    fragment_sizes = size;
+    fragment_sizes |= fragment_sizes >> 1;
+    fragment_sizes |= fragment_sizes >> 2; /* Filled in all sizes under max */
+    if ((fragment_sizes & write_sizes) != 0) {
+      int stride, chunk, count;
+      eb_data_t partial_data;
+      
+      /* We can do a fragmented write. Pick largest write possible. */
+      complete_size = fragment_sizes ^ (fragment_sizes >> 1); /* This many bytes to write */
+      /* (the above code sets complete_size = size, but works also if size were a mask) */
+      
+      /* Filter out only those which have a good write size */
+      fragment_sizes &= write_sizes;
+      /* Then pick the largest bit */
+      fragment_sizes |= fragment_sizes >> 1;
+      fragment_sizes |= fragment_sizes >> 2;
+      fragment_size = fragment_sizes ^ (fragment_sizes >> 1);
+      
+      /* We write fragments */
+      format |= fragment_size;
+      
+      if (!quiet)
+        fprintf(stderr, "%s: warning: fragmenting %s-bit write into %s-bit operations\n",
+                        program, width_str[complete_size], width_str[format & EB_DATAX]);
+      
+      /* Each operation writes this many bytes */
+      chunk = format & EB_DATAX;
+      count = complete_size / chunk;
+      
+      /* Write the low bits first */
+      switch (format & EB_ENDIAN_MASK) {
+      case EB_BIG_ENDIAN:
+        address += chunk*(count-1);
+        stride = -chunk;
+        break;
+      case EB_LITTLE_ENDIAN:
+        stride = chunk;
+        break;
+      default:
+        fprintf(stderr, "%s: error: must know endian to fragment write\n",
+                        program);
+        return 1;
+      }
+      
+      for (; count > 0; --count) {
+        partial_data = ~(eb_data_t)0;
+        partial_data >>= (sizeof(eb_data_t)-chunk)*8;
+        partial_data &= data;
+        
+        if (verbose)
+          fprintf(stdout, "Writing %016"EB_DATA_FMT" to %016"EB_ADDR_FMT"/%d\n",
+                          partial_data, address, format & EB_DATAX);
+        
+        eb_cycle_write(cycle, address, format, partial_data);
+        data >>= chunk*8;
+        address += stride;
+      }
+    } else {
+      eb_data_t original_data;
+      eb_address_t aligned_address;
+      int shift;
+      
+      /* All bits in write_sizes are larger than all bits in size */
+      /* We will need to do a larger operation than the write requested. */
+      
+      /* Pick the largest sized write possible. */
+      fragment_size = fragment_sizes ^ (fragment_sizes >> 1);
+      /* (the above code sets fragment_size = size, but works also if size were a mask) */
+      
+      /* Now pick the smallest bit in write_sizes. */
+      complete_size = write_sizes & -write_sizes;
+      
+      /* We have our final operation format. */
+      format |= complete_size;
+      
+      if (!quiet)
+        fprintf(stderr, "%s: warning: reading %s bits to write a %s bit fragment\n",
+                        program, width_str[complete_size], width_str[fragment_size]);
+      
+      /* Align the address */
+      aligned_address = address & ~(eb_address_t)(complete_size-1);
+      
+      /* How far do we need to shift the offset? */
+      switch (format & EB_ENDIAN_MASK) {
+      case EB_BIG_ENDIAN:
+        shift = (complete_size-fragment_size) - (address - aligned_address);
+        break;
+      case EB_LITTLE_ENDIAN:
+        shift = (address - aligned_address);
+        break;
+      default:
+        fprintf(stderr, "%s: error: must know endian to fill a partial write\n",
+                        program);
+        return 1;
+      }
+      mask <<= shift*8;
+      data <<= shift*8;
+      
+      /* Issue the read */
+      eb_cycle_read(cycle, aligned_address, format, &original_data);
+      if (verbose)
+        fprintf(stdout, "Reading %016"EB_ADDR_FMT"/%d\n",
+                        aligned_address, format & EB_DATAX);
+      eb_cycle_close(cycle);
+      stop = 0;
+      eb_device_flush(device);
+      while (!stop) {
+        eb_socket_block(socket, -1);
+        eb_socket_poll(socket);
+      }
+      
+      /* Restart the cycle */
+      cycle = eb_cycle_open(device, &stop, &set_stop);
+      
+      /* Inject the data */
+      data |= original_data & ~mask;
+      eb_cycle_write(cycle, aligned_address, format, data);
+      
+      if (verbose)
+        fprintf(stdout, "Writing %016"EB_DATA_FMT" to %016"EB_ADDR_FMT"/%d\n",
+                        data, aligned_address, format & EB_DATAX);
+    }
+  } else {
+    /* There is a size requested that the device and link supports */
+    format |= (size & write_sizes);
+    if (verbose)
+      fprintf(stdout, "Writing %016"EB_DATA_FMT" to %016"EB_ADDR_FMT"/%d\n",
+                      data, address, format & EB_DATAX);
+    eb_cycle_write(cycle, address, format, data);
+  }
+  
+  /* If the access it full width, an endian is needed. Print a friendlier message than EB_ADDRESS. */
+  if ((format & line_width & EB_DATAX) == 0 && (format & EB_ENDIAN_MASK) == 0) {
+    fprintf(stderr, "%s: error: when writing %s-bit through a %s-bit connection, endian is required.\n",
+                    program, width_str[format & EB_DATAX], width_str[line_width & EB_DATAX]);
+    return 1;
+  }
   
   stop = 0;
+  eb_cycle_close(cycle);
   eb_device_flush(device);
   while (!stop) {
     eb_socket_block(socket, -1);
@@ -103,12 +538,12 @@ int main(int argc, const char** argv) {
   }
   
   if ((status = eb_device_close(device)) != EB_OK) {
-    fprintf(stderr, "Failed to close Etherbone device: %s\n", eb_status(status));
+    fprintf(stderr, "%s: failed to close Etherbone device: %s\n", program, eb_status(status));
     return 1;
   }
   
   if ((status = eb_socket_close(socket)) != EB_OK) {
-    fprintf(stderr, "Failed to close Etherbone socket: %s\n", eb_status(status));
+    fprintf(stderr, "%s: failed to close Etherbone socket: %s\n", program, eb_status(status));
     return 1;
   }