diff --git a/etc/scdaq/scdaq.conf b/etc/scdaq/scdaq.conf
index 179898fb68a2e5b137a4dee79ee16b2c70eb30ab..1f7dfb653f45a44c8c404638b44803e076e244ad 100644
--- a/etc/scdaq/scdaq.conf
+++ b/etc/scdaq/scdaq.conf
@@ -8,9 +8,22 @@
 #   "wzdma"     for DMA driver from Wojciech M. Zabolotny
 #   "dma"       for XILINX DMA driver
 #   "filedma"   for reading from file and simulating DMA
-#
+#   "micronDMA" for PICO driver
+    
+
 input:wzdma
 
+
+# Settings for Micron board only
+  
+loadBitFile:yes
+
+    
+# ONLY for Micron Board/DMA, must be in tgz format, produced by pico utility package_flash
+     
+bitFileName:micron_bitfile_path
+
+     
 ## Settings for DMA input
 
 # DMA device
diff --git a/scripts/scdaqrpm.sh b/scripts/scdaqrpm.sh
index 147fd4b297b1b19f5c6e0b4526d008e969fe78d9..d91b92dace87ab48a003be3ee0bcbbbf4380f5e8 100755
--- a/scripts/scdaqrpm.sh
+++ b/scripts/scdaqrpm.sh
@@ -174,7 +174,7 @@ BuildArch: $BUILD_ARCH
 AutoReqProv: no
 Provides:/opt/scdaq
 
-Requires: tbb boost-thread libcurl
+Requires: tbb boost-thread libcurl libcrypto
 
 %description
 scouting daq 
diff --git a/src/Makefile b/src/Makefile
index 2b875b4e07484320edc6131b580d8aed23492bda..a6d803939038750f15115332c8fbfeb9b08938a4 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -12,7 +12,7 @@
 TARGET = scdaq
 
 # source files
-SOURCES = config.cc DmaInputFilter.cc elastico.cc FileDmaInputFilter.cc InputFilter.cc output.cc processor.cc scdaq.cc session.cc slice.cc WZDmaInputFilter.cc
+SOURCES = config.cc DmaInputFilter.cc elastico.cc FileDmaInputFilter.cc InputFilter.cc output.cc processor.cc scdaq.cc session.cc slice.cc WZDmaInputFilter.cc MicronDmaInputFilter.cc
 C_SOURCES = wz_dma.c
 
 # work out names of object files from sources
@@ -27,7 +27,7 @@ CXXFLAGS = -std=c++11 -Wall -Wextra -O0 -g -rdynamic -Wconversion
 #CXXFLAGS = -std=c++11 -Wall -Wextra -g -rdynamic
 
 CFLAGS = $(CXXFLAGS)
-LDFLAGS = -ltbb -ltbbmalloc -lboost_thread -lboost_chrono -lcurl -pthread
+LDFLAGS = -Llibmicron -ltbb -ltbbmalloc -lboost_thread -lboost_chrono -lcurl -lpthread -lcrypto 
 
 CPPFLAGS = -I. -Iwzdma
 
@@ -43,7 +43,7 @@ clean:
 # dependencies separated by spaces (duplicates removed),
 # here ${OBJECTS}
 ${TARGET}: ${OBJECTS}
-	${LINK.cc} -o $@ $^
+	${LINK.cc} -o $@ $^ -lmicron
 
 # no rule is needed here for compilation as make already
 # knows how to do it
@@ -64,8 +64,9 @@ elastico.o:	elastico.h format.h slice.h controls.h log.h
 FileDmaInputFilter.o:	FileDmaInputFilter.h InputFilter.h log.h
 InputFilter.o:	InputFilter.h log.h
 output.o:	output.h slice.h log.h
-processor.o:	processor.h slice.h format.h log.h
+processor.o:	processor.h slice.h format.h log.h bril_histo.h
 session.o:	session.h log.h
 slice.o: 	slice.h
 WZDmaInputFilter.o:	WZDmaInputFilter.h InputFilter.h tools.h log.h
 wz_dma.o:	wz_dma.h
+MicronDmaInputFilter.o: MicronDmaInputFilter.h libmicron/micron_dma.h
diff --git a/src/MicronDmaInputFilter.cc b/src/MicronDmaInputFilter.cc
new file mode 100644
index 0000000000000000000000000000000000000000..bc31b50b783f8a766db24eea0e2848f4cd3c24e4
--- /dev/null
+++ b/src/MicronDmaInputFilter.cc
@@ -0,0 +1,190 @@
+
+#include "MicronDmaInputFilter.h"
+
+#include <ctype.h>
+
+#include <boost/multiprecision/cpp_int.hpp>
+#include <cstring>
+#include <system_error>
+
+#include "format.h"
+#include "log.h"
+
+using uint256_t = boost::multiprecision::number<
+    boost::multiprecision::cpp_int_backend<256, 256, boost::multiprecision::unsigned_magnitude,
+                                           boost::multiprecision::unchecked, void>,
+    boost::multiprecision::et_off>;
+
+MicronDmaInputFilter::MicronDmaInputFilter(size_t packetBufferSize, size_t nbPacketBuffers,
+                                           ctrl &control, config &conf)
+    : InputFilter(packetBufferSize, nbPacketBuffers, control) {
+  std::string bitFileName = conf.getBitFileName();
+  bool loadBitFile = conf.getLoadBitFile();
+  int err;
+  processorType_ = conf.getProcessorType();
+  // The RunBitFile function will locate a Pico card that can run the given bit
+  // file, and is not already
+  //   opened in exclusive-access mode by another program. It requests exclusive
+  //   access to the Pico card so no other programs will try to reuse the card
+  //   and interfere with us.
+
+  if (loadBitFile) {
+    LOG(DEBUG) << "Loading FPGA with '" << bitFileName << "' ...";
+    err = micron_run_bit_file(bitFileName.c_str(), &pico_);
+  } else {
+    err = micron_find_pico(0x852, &pico_);
+  }
+
+  LOG(TRACE) << "Opening streams to and from the FPGA";
+  stream2_ = micron_create_stream(pico_, 2);
+
+  if (stream2_ < 0) {
+    // All functions in the Pico API return an error code.  If that code is < 0,
+    // then you should use the PicoErrors_FullError function to decode the error
+    // message.
+    // fprintf(stderr, "%s: CreateStream error: %s\n", "bitfile",
+    // PicoErrors_FullError(stream1_, 0, packetBufferSize));
+
+    throw std::runtime_error(bitFileName + ": CreateStream error: " +
+                             micron_picoerrors_fullerror(stream2_, 0, packetBufferSize));
+  }
+  LOG(TRACE) << "Created Micron DMA input filter";
+}
+
+MicronDmaInputFilter::~MicronDmaInputFilter() {
+  // streams are automatically closed when the PicoDrv object is destroyed, or
+  // on program termination, but
+  //  we can also close a stream manually.
+  micron_close_stream(pico_, stream2_);
+  LOG(TRACE) << "Destroyed Micron DMA input filter";
+}
+
+// add pad bytes to next multiple of 16 bytes
+int pad_for_16bytes(int len) {
+  int pad_len = len;
+  if (len % 16 != 0) {
+    pad_len = len + (16 - len % 16);
+  }
+  return pad_len;
+}
+
+// print <count> 256-bit numbers from buf
+void print256(FILE *f, void *buf, int count) {
+  uint32_t *u32p = (uint32_t *)buf;
+
+  for (int i = 0; i < count; ++i) {
+    fprintf(f, "0x%08x_%08x_%08x_%08x_%08x_%08x_%08x_%08x\n", u32p[8 * i + 7], u32p[8 * i + 6],
+            u32p[8 * i + 5], u32p[8 * i + 4], u32p[8 * i + 3], u32p[8 * i + 2], u32p[8 * i + 1],
+            u32p[8 * i]);
+  }
+}
+
+// print <count> 128-bit numbers from buf
+void print128(FILE *f, void *buf, int count) {
+  uint32_t *u32p = (uint32_t *)buf;
+
+  for (int i = 0; i < count; ++i)
+    fprintf(f, "0x%08x_%08x_%08x_%08x\n", u32p[4 * i + 3], u32p[4 * i + 2], u32p[4 * i + 1],
+            u32p[4 * i]);
+}
+
+ssize_t MicronDmaInputFilter::runMicronDAQ(char **buffer, size_t bufferSize) {
+  // Usually Pico streams come in two flavors: 4Byte wide, 16Byte wide
+  // (equivalent to 32bit, 128bit respectively) However, all calls to ReadStream
+  // and WriteStream must be 16Byte aligned (even for 4B wide streams) There is
+  // also an 'undocumented' 32Byte wide (256 bit) stream We are using that
+  // stream here (and in the firmware)
+  //
+  // Now allocate 32B aligned space for read and write stream buffers
+  //
+  // Similarily, the size of the call, in bytes, must also be a multiple of
+  // 16Bytes.
+  //
+  // err = posix_memalign((void**)&rbuf, 32, size);
+  //
+  // NOTE: Buffer are already preallocated and aligned to 32 bytes boundaries
+
+  // As with WriteStream, a ReadStream will block until all data is returned
+  // from the FPGA to the host.
+  //
+  // A user application will either have a deterministic amount of results, or a
+  // non-deterministic amount.
+  // - When a non-deterministic amount of results are expected, and given the
+  // blocking nature of the ReadStream,
+  //   a user should use the GetBytesAvailable() call to determine the amount of
+  //   data available for retrieval.
+  // - When a deterministic amount of results is expected, a user can skip the
+  // performance impacting
+  //   GetBytesAvailable() call and request the full amount of results. The user
+  //   could also call ReadStream() iteratively, without GetBytesAvailable(), in
+  //   which case getting smaller chunks of results may allow additional
+  //   processing of the data on the host while the FPGA generates more results.
+  //   Either approach works, and should be kept in mind when tuning performance
+  //   of your application.
+  //
+  //
+  // By calling GetBytesAvailable, one can see how much data is ready to read //
+  // not used for speed
+  // i = pico->GetBytesAvailable(stream1_, true /* reading */);
+  //	if (i < 0){
+  //	fprintf(stderr, "%s: GetBytesAvailable error: %s\n", "bitfile",
+  // PicoErrors_FullError(i, ibuf, sizeof(ibuf))); 	exit(-1);
+  //	}
+
+  // Here is where we actually call ReadStream
+  // This reads "size" number of bytes of data from the output stream specified
+  // by our stream handle (e.g. 'stream') into our host buffer (rbuf) This call
+  // will block until it is able to read the entire "size" Bytes of data.
+
+  uint32_t *u32p;
+  uint32_t packetSize;
+  int err;
+  bool bril = true;
+
+  if (processorType_ == StreamProcessor::ProcessorType::BRIL) {
+    u32p = (uint32_t *)((*buffer));
+    packetSize = 20 * 32 * 1791;  // bril packet size 16*3488 + 32*8;
+  } else {
+    packetSize = 32 * (*((uint32_t *)((*buffer) + 8)) + 2);
+  }
+
+  err = micron_read_stream(pico_, stream2_, *buffer, packetSize);
+
+  if (err < 0) {
+    std::cout << "err = " << err << std::endl;
+    throw std::runtime_error("ReadStream finished with error");
+  }
+
+  return (packetSize);
+}
+
+void MicronDmaInputFilter::rwRegisters() {
+  // err = micron_WriteDeviceAbsolute(pico_, 8, 0, 32);
+  // u32p = (uint32_t*) ((*buffer));
+  // err = micron_ReadDeviceAbsolute(pico_, 4, *buffer, 4);
+  // std::cout << "reading blocked_ro count " << std::hex << *u32p << std::endl;
+
+  /*if (err < 0) {
+          std::cout << "err = " << err << std::endl;
+          throw std::runtime_error( "ReadDeviceAbsolute finished with error" );
+  }
+*/
+}
+
+/**************************************************************************
+ * Entry points are here
+ * Overriding virtual functions
+ */
+
+// Print some additional info
+void MicronDmaInputFilter::print(std::ostream &out) const {
+  // out
+  // 	<< ", DMA errors " << stats.nbDmaErrors
+  // 	<< ", oversized " << stats.nbDmaOversizedPackets
+  // 	<< ", resets " << stats.nbBoardResets;
+}
+
+// Read a packet from DMA
+ssize_t MicronDmaInputFilter::readInput(char **buffer, size_t bufferSize) {
+  return runMicronDAQ(buffer, bufferSize);
+}
diff --git a/src/MicronDmaInputFilter.h b/src/MicronDmaInputFilter.h
new file mode 100644
index 0000000000000000000000000000000000000000..b81b18cdeca933380639771483e2d2eded36b37b
--- /dev/null
+++ b/src/MicronDmaInputFilter.h
@@ -0,0 +1,30 @@
+#ifndef MICRONDMA_INPUT_FILER_H
+#define MICRONDMA_INPUT_FILER_H
+
+#include <memory>
+
+#include "InputFilter.h"
+#include "config.h"
+#include "libmicron/micron_dma.h"
+#include "processor.h"
+
+class MicronDmaInputFilter : public InputFilter {
+ public:
+  static bool firstRead;
+  MicronDmaInputFilter(size_t, size_t, ctrl &, config &);
+  virtual ~MicronDmaInputFilter();
+
+ protected:
+  ssize_t readInput(char **buffer, size_t bufferSize);  // Override
+  void print(std::ostream &out) const;                  // Override
+
+ private:
+  micron_private *pico_;
+  int stream2_;
+  void rwRegisters();
+  ssize_t runMicronDAQ(char **buffer, size_t bufferSize);
+  StreamProcessor::ProcessorType processorType_;
+};
+typedef std::shared_ptr<MicronDmaInputFilter> MicronDmaInputFilterPtr;
+
+#endif  // MICRONDMA_INPUT_FILER_H
diff --git a/src/bril_histo.h b/src/bril_histo.h
new file mode 100644
index 0000000000000000000000000000000000000000..f6e770e0f1199bc2f1f582a59690991426b551f1
--- /dev/null
+++ b/src/bril_histo.h
@@ -0,0 +1,33 @@
+#ifndef BRIL_HISTO_H
+#define BRIL_HISTO_H
+
+#include <atomic>
+#include <fstream>
+#include <iostream>
+#include <mutex>
+#include <queue>
+#include <utility>
+#include <vector>
+
+#include "controls.h"
+#include "tbb/pipeline.h"
+
+template <typename T>
+class BrilHistoQueue {
+ public:
+  void push(const T &value) {
+    std::lock_guard<std::mutex> lock(m_mutex);
+    m_queque.push(value);
+  }
+
+  void pop() {
+    std::lock_guard<std::mutex> lock(m_mutex);
+    m_queque.pop();
+  }
+
+ private:
+  std::queue<T> m_queque;
+  mutable std::mutex m_mutex;
+};
+
+#endif
diff --git a/src/config.h b/src/config.h
index ebf7a05b59f66299ea0772815db0e9fff16d4a15..8b643c839c6e9c81070e9fa37d9f456db514b8bd 100644
--- a/src/config.h
+++ b/src/config.h
@@ -12,7 +12,7 @@
 
 class config {
  public:
-  enum class InputType { WZDMA, DMA, FILEDMA, FILE };
+  enum class InputType { WZDMA, DMA, FILEDMA, MICRONDMA, FILE };
 
   config(std::string filename);
 
@@ -29,6 +29,9 @@ class config {
     if (input == "filedma") {
       return InputType::FILEDMA;
     }
+    if (input == "micronDMA") {
+      return InputType::MICRONDMA;
+    }
     if (input == "file") {
       return InputType::FILE;
     }
@@ -58,6 +61,8 @@ class config {
     return boost::lexical_cast<uint32_t>(v.c_str());
   }
 
+  const std::string &getBitFileName() const { return vmap.at("bitFileName"); }
+
   StreamProcessor::ProcessorType getProcessorType() const {
     const std::string &input = vmap.at("processor_type");
     if (input == "PASS_THROUGH") {
@@ -69,6 +74,9 @@ class config {
     if (input == "CALO") {
       return StreamProcessor::ProcessorType::CALO;
     }
+    if (input == "BRIL") {
+      return StreamProcessor::ProcessorType::BRIL;
+    }
     throw std::invalid_argument("Configuration error: Wrong processor type '" + input + "'");
   }
 
@@ -81,6 +89,9 @@ class config {
   bool getOutputForceWrite() const {
     return (true ? vmap.at("output_force_write") == "yes" : false);
   }
+
+  bool getLoadBitFile() const { return (true ? vmap.at("loadBitFile") == "yes" : false); }
+
   uint32_t getNOrbitsPerDMAPacket() const {
     std::string v = vmap.at("NOrbitsPerDMAPacket");
     return boost::lexical_cast<uint32_t>(v.c_str());
diff --git a/src/controls.h b/src/controls.h
index 6581c62532d6cd0ba87cd5823763e8771db9852c..ad06fd8b7e91c5eabaf32f661b859c065117b223 100644
--- a/src/controls.h
+++ b/src/controls.h
@@ -3,6 +3,7 @@
 #include <stdint.h>
 
 #include <atomic>
+#include <string>
 
 struct ctrl {
   uint32_t run_number;
diff --git a/src/format.h b/src/format.h
index fc7819ea4577bbd9c9b14c3a40b6608669df6140..b8a5e761357cd5a0a2ec5d465dedbfc230f76210 100644
--- a/src/format.h
+++ b/src/format.h
@@ -29,6 +29,12 @@ struct blockMuon {
   uint32_t mu2s[8];
 };
 
+struct brilFrame {
+  uint32_t word;
+  uint32_t counter;
+  uint32_t emptyLinks[6];
+};
+
 struct bx_map_frame {
   uint32_t bx_map_l[8];
 };
@@ -239,6 +245,9 @@ struct constants {
   // dma_trailer_size
   static constexpr uint32_t intermediate = 0x00000001;
   static constexpr uint32_t final = 0x00000001;
+  static constexpr uint32_t bril_header = 4278781695;
+  static constexpr uint32_t NBXPerOrbit = 3564;
+  static constexpr uint32_t NFramesInHistoHeader = 9;
 };
 
 #endif
diff --git a/src/libmicron/libcrypto.so b/src/libmicron/libcrypto.so
new file mode 100755
index 0000000000000000000000000000000000000000..a9d9417293f9693bd1fd8d53681558c79e1ded99
Binary files /dev/null and b/src/libmicron/libcrypto.so differ
diff --git a/src/libmicron/libmicron.a b/src/libmicron/libmicron.a
new file mode 100644
index 0000000000000000000000000000000000000000..24efd18ab9bffe73f8a7a864c3990606c9622bcc
Binary files /dev/null and b/src/libmicron/libmicron.a differ
diff --git a/src/libmicron/makelib.sh b/src/libmicron/makelib.sh
new file mode 100755
index 0000000000000000000000000000000000000000..20045e81bbe17cb520d31c7493183fbafbf60db5
--- /dev/null
+++ b/src/libmicron/makelib.sh
@@ -0,0 +1,23 @@
+#
+# Simple workaround creating a Micron library
+#
+# exit when any command fails
+set -e
+
+rm -f *.o *.a
+
+echo "Compilling..."
+g++ -Wl,--copy-dt-needed-entries -std=c++11 -fPIC -Wall -Wextra -g -rdynamic -Wno-multichar -DLINUX -DPOSIX \
+-fmessage-length=0 -fdiagnostics-show-option -fms-extensions -Wno-write-strings -DOSNAME=“Linux” \
+-I. -I${PICOBASE}/software/include/linux -I${PICOBASE}/software/include \
+-c \
+${PICOBASE}/software/source/pico_drv_linux.cpp \
+${PICOBASE}/software/source/GString.cpp \
+${PICOBASE}/software/source/pico_drv.cpp \
+${PICOBASE}/software/source/pico_errors.cpp \
+${PICOBASE}/software/source/linux/linux.cpp \
+${PICOBASE}/software/source/pico_drv_swsim.cpp \
+micron_dma.c
+
+echo "Creating library..."
+ar rcs libmicron.a GString.o linux.o pico_drv.o pico_drv_linux.o pico_drv_swsim.o pico_errors.o micron_dma.o
diff --git a/src/libmicron/micron_dma.c b/src/libmicron/micron_dma.c
new file mode 100644
index 0000000000000000000000000000000000000000..e4535322db3350535acd10139e3a81a2c097e6bb
--- /dev/null
+++ b/src/libmicron/micron_dma.c
@@ -0,0 +1,52 @@
+/*
+ * Library encapsulating Micron functions
+ */
+
+#include <picodrv.h>
+#include <pico_errors.h>
+
+#include "micron_dma.h"
+
+int micron_run_bit_file(const char *bitFilePath, micron_private** micron)
+{
+	//PicoDrv **drvpp
+	return RunBitFile(bitFilePath, (PicoDrv**) micron);
+
+}
+
+int micron_find_pico(uint32_t model, micron_private** micron)
+{
+	return FindPico(model, (PicoDrv**) micron);
+}
+
+int micron_create_stream(micron_private* micron, int streamNum)
+{
+	return ((PicoDrv*) micron)->CreateStream(streamNum);
+}
+
+void micron_close_stream(micron_private* micron, int streamHandle)
+{
+	((PicoDrv*) micron)->CloseStream(streamHandle);
+}
+
+int micron_read_stream(micron_private* micron, int streamHandle, void *buf, int size)
+{
+	return ((PicoDrv*) micron)->ReadStream(streamHandle, buf, size);
+
+}
+
+int micron_ReadDeviceAbsolute (micron_private* micron, int addr, void* buf, int numBytes) 
+{
+	return ((PicoDrv*) micron)->ReadDeviceAbsolute(addr, buf, numBytes);
+
+}
+int micron_WriteDeviceAbsolute (micron_private* micron, int addr, void* buf, int numBytes) 
+{
+	return ((PicoDrv*) micron)->WriteDeviceAbsolute(addr, buf, numBytes);
+
+}
+
+const char *micron_picoerrors_fullerror(int erC, char *resultP, int resultSize)
+{
+	return PicoErrors_FullError(erC, resultP, resultSize);
+}
diff --git a/src/libmicron/micron_dma.h b/src/libmicron/micron_dma.h
new file mode 100644
index 0000000000000000000000000000000000000000..da6672c02fe52db9da6922a414877988616a175e
--- /dev/null
+++ b/src/libmicron/micron_dma.h
@@ -0,0 +1,15 @@
+#ifndef MICRON_DMA_H
+#define MICRON_DMA_H
+
+typedef void micron_private;
+
+int micron_run_bit_file(const char *bitFilePath, micron_private **micron);
+int micron_find_pico(uint32_t model, micron_private **micron);
+int micron_ReadDeviceAbsolute(micron_private *micron, int addr, void *buf, int numBytes);
+int micron_WriteDeviceAbsolute(micron_private *micron, int addr, void *buf, int numBytes);
+int micron_create_stream(micron_private *micron, int streamNum);
+void micron_close_stream(micron_private *micron, int streamHandle);
+int micron_read_stream(micron_private *micron, int streamHandle, void *buf, int size);
+const char *micron_picoerrors_fullerror(int erC, char *resultP, int resultSize);
+
+#endif  // MICRON_DMA_H
diff --git a/src/processor.cc b/src/processor.cc
index 846a760ab8d525387ffc8155aae20012aadbe08a..abb5f374d069e6781d499ef1dd7515a284a7bebf 100644
--- a/src/processor.cc
+++ b/src/processor.cc
@@ -24,6 +24,9 @@ StreamProcessor::StreamProcessor(size_t max_size_, bool doZS_, ProcessorType pro
   myfile.open("example.txt");
 }
 
+BrilHistoQueue<std::array<uint32_t, constants::NBXPerOrbit + constants::NFramesInHistoHeader>>
+    StreamProcessor::BrilQueue;
+
 // Loops over each word in the orbit trailer BX map and fills a vector with the
 // non-empty BX values
 void bit_check(std::vector<unsigned int> *bx_vect, uint32_t word, uint32_t offset) {
@@ -109,16 +112,16 @@ StreamProcessor::fillOrbitMetadata StreamProcessor::FillOrbitCalo(
       break;
     }  // orbit trailer has been reached, end of orbit data
     uint32_t bx = uint32_t{bx_vect[relbx]};
-    uint32_t orbit = uint32_t{orbit_header.first};
+    uint32_t orbit_ = uint32_t{orbit_header.first};
     if (bx > 3554) {
-      orbit--;
+      orbit_--;
     }  // fix for the fact that bx 3555 - 3564 are from the previous orbit
     uint32_t header = uint32_t{orbit_header.second};  // header can be added to later
     memcpy(wr_ptr, (char *)&header, 4);
     wr_ptr += 4;
     memcpy(wr_ptr, (char *)&bx, 4);
     wr_ptr += 4;
-    memcpy(wr_ptr, (char *)&orbit, 4);
+    memcpy(wr_ptr, (char *)&orbit_, 4);
     wr_ptr += 4;
     for (uint32_t i = 0; i < 8; i++) {
       memcpy(wr_ptr, (char *)&i, 4);
@@ -148,6 +151,57 @@ StreamProcessor::fillOrbitMetadata StreamProcessor::FillOrbitCalo(
   return meta;
 }
 
+uint32_t StreamProcessor::FillBril(char *rd_ptr, char *wr_ptr, char *end_ptr) {
+  static constexpr uint32_t NHistosPerPacket = 20;  // should not be changed, any more and SB852
+                                                    // crashes, would need to re-upload bitfile
+  uint32_t histo_i, histo_word_i = 0;
+  std::array<std::array<uint32_t, constants::NBXPerOrbit + constants::NFramesInHistoHeader>,
+             NHistosPerPacket>
+      histo_arr;
+  // uint32_t histo_arr[NHistosPerPacket][constants::NBXPerOrbit +
+  // NFramesInHistoHeader];
+
+  // BrilHistoQueue<std::array<uint32_t, constants::NBXPerOrbit +
+  // constants::NFramesInHistoHeader>> BrilQueue;
+
+  while ((rd_ptr != end_ptr) && (histo_i < NHistosPerPacket)) {
+    brilFrame *fr = reinterpret_cast<brilFrame *>(rd_ptr);
+    if (fr->word == constants::bril_header) {
+      rd_ptr += 32;
+      histo_i++;
+      histo_word_i = 0;
+      continue;
+    }
+
+    if (histo_i == 0) {
+      continue;
+    }  // Currently appears to be a bug where first histogram of packet is
+       // truncated, need to understand and fix, for now skip
+
+    if (histo_word_i < constants::NFramesInHistoHeader) {
+      histo_arr[histo_i][histo_word_i] = (fr->word >> 0) & 0xffffffff;
+    } else {
+      histo_arr[histo_i][(histo_word_i * 2) - constants::NFramesInHistoHeader] =
+          (fr->word >> 0) & 0xffff;
+      histo_arr[histo_i][(histo_word_i * 2) + 1 - constants::NFramesInHistoHeader] =
+          (fr->word >> 16) & 0xffff;
+    }
+    rd_ptr += sizeof(brilFrame);
+    histo_word_i++;
+  }
+  uint32_t packed_size = sizeof(uint32_t) * NHistosPerPacket *
+                         (constants::NBXPerOrbit + constants::NFramesInHistoHeader);
+  memcpy(wr_ptr, (char *)&histo_arr, packed_size);
+  wr_ptr += packed_size;
+
+  for (std::array<uint32_t, constants::NBXPerOrbit + constants::NFramesInHistoHeader> &hist :
+       histo_arr) {
+    BrilQueue.push(hist);
+  }
+
+  return packed_size;
+}
+
 // Goes through orbit worth of data and fills the output memory with the muons
 // corresponding to the non-empty bunchcrossings, as marked in bx_vect
 StreamProcessor::fillOrbitMetadata StreamProcessor::FillOrbitMuon(
@@ -267,6 +321,7 @@ void StreamProcessor::process(Slice &input, Slice &out) {
 
   char *rd_ptr = input.begin();
   char *wr_ptr = out.begin();
+  char *end_ptr = input.end();
   uint32_t counts = 0;
   bool endofpacket = false;
   uint32_t orbit_per_packet_count = 0;
@@ -280,6 +335,12 @@ void StreamProcessor::process(Slice &input, Slice &out) {
     out.set_counts(1);
     return;
   }
+  if (processorType == ProcessorType::BRIL) {
+    counts = FillBril(rd_ptr, wr_ptr, end_ptr);
+    out.set_end(out.begin() + counts);
+    out.set_counts(counts);
+    return;
+  }
 
   if (!CheckFrameMultBlock(input.size())) {
     return;
diff --git a/src/processor.h b/src/processor.h
index 96d57462f0af03ba851021dbabb9eef287545f01..4f3781bd591285868127db8325fa91bb2a5b79a8 100644
--- a/src/processor.h
+++ b/src/processor.h
@@ -7,16 +7,21 @@
 #include <utility>
 #include <vector>
 
+#include "bril_histo.h"
 #include "controls.h"
+#include "format.h"
 #include "tbb/pipeline.h"
 
-// reformatter
-
 class Slice;
 
 class StreamProcessor : public tbb::filter {
  public:
-  enum class ProcessorType { PASS_THROUGH, GMT, CALO };
+  static BrilHistoQueue<
+      std::array<uint32_t, constants::NBXPerOrbit + constants::NFramesInHistoHeader>>
+      BrilQueue;
+
+  enum class ProcessorType { PASS_THROUGH, GMT, CALO, BRIL };
+
   StreamProcessor(size_t max_size_, bool doZS_, ProcessorType processorType_,
                   uint32_t nOrbitsPerDMAPacket_, uint32_t prescaleFactor, ctrl &control);
   void *operator()(void *item) /*override*/;
@@ -33,6 +38,7 @@ class StreamProcessor : public tbb::filter {
   inline std::pair<uint32_t, bool> ProcessOrbitHeader(char *rd_ptr);
   fillOrbitMetadata FillOrbitMuon(std::vector<unsigned int> &bx_vect, char *rd_ptr, char *wr_ptr);
   fillOrbitMetadata FillOrbitCalo(std::vector<unsigned int> &bx_vect, char *rd_ptr, char *wr_ptr);
+  uint32_t FillBril(char *rd_ptr, char *wr_ptr, char *end_ptr);
   std::ofstream myfile;
   size_t max_size;
   uint64_t nbPackets;
diff --git a/src/qspi_flash_ctl/Makefile b/src/qspi_flash_ctl/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..c2bac064f2c13e5ffa0ddf77dfe59d83ebea6098
--- /dev/null
+++ b/src/qspi_flash_ctl/Makefile
@@ -0,0 +1,4 @@
+TARGET = flash_ctl
+USER_SOURCES = flash_ctl.cpp i2c.cpp
+
+include $(PICOBASE)/software/Makefile.common
diff --git a/src/qspi_flash_ctl/flash_ctl b/src/qspi_flash_ctl/flash_ctl
new file mode 100755
index 0000000000000000000000000000000000000000..ac704e8843be446a77be7ffaffb65ff0fc283512
Binary files /dev/null and b/src/qspi_flash_ctl/flash_ctl differ
diff --git a/src/qspi_flash_ctl/flash_ctl.cpp b/src/qspi_flash_ctl/flash_ctl.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5f577fa7126dc6da08cbd0d795df9413815c42ba
--- /dev/null
+++ b/src/qspi_flash_ctl/flash_ctl.cpp
@@ -0,0 +1,1257 @@
+/*===================================================================================
+ File: flash_ctl.cpp.
+
+ This module is a simple utility designed to illustrate the operation of the QSPI
+ flash controller.  A stream connects to the QSPI flash controller and the software
+ has commands for erasing, programming, verifying and reading the flash, and reading
+ the ID register of the flash.
+
+ This program interfaces with the firmware module flash_ctl.v in the firmware directory.
+
+=====================================================================================*/
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <time.h>
+#include <getopt.h>
+#include <picodrv.h>
+#include <pico_errors.h>
+#include </usr/include/boost/format.hpp>
+#include "i2c.h"
+
+// Globals
+PicoDrv     *pico;
+int         streamhandle;
+uint32_t    *readbuf, *writebuf;
+int         verbose = 0;
+
+// Routine to read the controller busy output pin until it is deasserted
+void check_qspi_flash_ctl_busy() {
+    int err;
+    char        ibuf[1024];
+
+    writebuf[0] = 0x8;          // Data length in bytes of stream command
+    writebuf[1] = 0x1000;       // Status register Address
+    writebuf[2] = 0x1;          // Read cmd, bit[64]=1
+    writebuf[3] = 0x0;          // unused
+    if (verbose) {
+        printf("Checking busy");
+    } 
+
+    do {
+        err = pico->WriteStream(streamhandle, &writebuf[0], 16);
+        if (err < 0) {
+            fprintf(stderr, "flash_ctl_wr: WriteStream error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
+            exit(-1);
+        }
+        err = pico->ReadStream(streamhandle, readbuf, 16);
+        if (err < 0) {
+            fprintf(stderr, "flash_ctl_wr: ReadStream error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
+            exit(-1);
+        }
+        if (verbose) {
+            printf("%s", (readbuf[0] & 0x01) ? "." : "\n");
+        }
+    } while ((readbuf[0] & 0x01) == 0x01); // loop until qspi_flash_ctl busy bit is low
+}
+
+// Routine to write 64-bits of data to a PicoBus register
+void picobus_wr(uint32_t addr, uint64_t data) {
+    int err;
+    char        ibuf[1024];
+
+    // Send qspi flash controller write command to stream interface
+    writebuf[0] = 0x8;                                // Data length in bytes of PicoBus cycle 
+    writebuf[1] = addr;                               // 32-bit Picobus Address
+    writebuf[2] = 0x0;                                // Write cmd, bit[64] = 0
+    writebuf[3] = 0x0;                                // unused upper portion of stream command word
+    writebuf[4] = (uint32_t)  data & 0xFFFFFFFF;      // low 32-bits of 64-bit data 
+    writebuf[5] = (uint32_t) (data>>32) & 0xFFFFFFFF; // high 32-bits of 64-bit data
+    writebuf[6] = 0x0;                                // unused upper portion of stream data word
+    writebuf[7] = 0x0;                                // unused upper portion of stream data word
+    if (verbose) printf("Sending %08X%08X to   %03X \n", (uint32_t)(data>>32) & 0xFFFFFFFF, (uint32_t) data & 0xFFFFFFFF, addr);
+    err = pico->WriteStream(streamhandle, &writebuf[0], 32);  // Stream write of 32 bytes, two 128-bit words
+    if (err < 0) {
+        fprintf(stderr, "picobus_wr: WriteStream error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
+        exit(-1);
+    }
+}
+
+// Routine to read 64-bits of data from a PicoBus register 
+uint64_t picobus_rd(uint32_t addr) {
+    int err;
+    char        ibuf[1024];
+    uint64_t    readdata;
+
+    // clear our read buffer to prepare for the read.
+    memset(readbuf, 0, sizeof(readbuf));
+
+    // Send qspi flash controller read command to stream interface
+    writebuf[0] = 0x8;                                // Data length in bytes of stream command
+    writebuf[1] = addr;                               // 32-bit Picobus Address
+    writebuf[2] = 0x1;                                // Read cmd, bit[64] = 1
+    writebuf[3] = 0x0;                                // unused upper portion of stream command word
+    err = pico->WriteStream(streamhandle, &writebuf[0], 16);
+    if (err < 0) {
+        fprintf(stderr, "picobus_rd: WriteStream error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
+        exit(-1);
+    }
+
+    // Read the response data from the stream interface.
+    err = pico->ReadStream(streamhandle, readbuf, 16);
+    if (err < 0) {
+        fprintf(stderr, "picobus_rd: ReadStream error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
+        exit(-1);
+    }
+
+    readdata = ( (uint64_t) readbuf[1]<<32) | readbuf[0];
+    return(readdata);
+}
+
+// Routine to write 64-bits of data to a qspi flash controller register
+void flash_ctl_wr(uint32_t addr, uint64_t data) {
+    int err;
+    char        ibuf[1024];
+
+    check_qspi_flash_ctl_busy();
+
+    // Send qspi flash controller write command to stream interface
+    writebuf[0] = 0x8;                                // Data length in bytes of PicoBus cycle 
+    writebuf[1] = addr & 0xFF8;                       // 12-bit Picobus Address
+    writebuf[2] = 0x0;                                // Write cmd, bit[64] = 0
+    writebuf[3] = 0x0;                                // unused upper portion of stream command word
+    writebuf[4] = (uint32_t)  data & 0xFFFFFFFF;      // low 32-bits of 64-bit data 
+    writebuf[5] = (uint32_t) (data>>32) & 0xFFFFFFFF; // high 32-bits of 64-bit data
+    writebuf[6] = 0x0;                                // unused upper portion of stream data word
+    writebuf[7] = 0x0;                                // unused upper portion of stream data word
+    if (verbose) printf("Sending %08X%08X to   %03X \n", (uint32_t)(data>>32) & 0xFFFFFFFF, (uint32_t) data & 0xFFFFFFFF, addr & 0xFF8);
+    err = pico->WriteStream(streamhandle, &writebuf[0], 32);  // Stream write of 32 bytes, two 128-bit words
+    if (err < 0) {
+        fprintf(stderr, "flash_ctl_wr: WriteStream error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
+        exit(-1);
+    }
+}
+
+// Routine to read 64-bits of data from a qspi flash controller register and check
+// if the received data equals the expected data
+void flash_ctl_rd(uint32_t addr, uint64_t exp_data, int check_data) {
+    int err;
+    char        ibuf[1024];
+
+    check_qspi_flash_ctl_busy();
+
+    // clear our read buffer to prepare for the read.
+    memset(readbuf, 0, sizeof(readbuf));
+
+    // Send qspi flash controller read command to stream interface
+    writebuf[0] = 0x8;                                // Data length in bytes of stream command
+    writebuf[1] = addr & 0xFF8;                       // 12-bit Address
+    writebuf[2] = 0x1;                                // Read cmd, bit[64] = 1
+    writebuf[3] = 0x0;                                // unused upper portion of stream command word
+    err = pico->WriteStream(streamhandle, &writebuf[0], 16);
+    if (err < 0) {
+        fprintf(stderr, "flash_ctl_rd: WriteStream error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
+        exit(-1);
+    }
+
+    // Read the response data from the stream interface.
+    err = pico->ReadStream(streamhandle, readbuf, 16);
+    if (err < 0) {
+        fprintf(stderr, "flash_ctl_rd: ReadStream error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
+        exit(-1);
+    }
+
+    if (check_data) {
+        if (((uint32_t) (exp_data & 0xFFFFFFFF) == readbuf[0]) && ((uint32_t) ((exp_data>>32) & 0xFFFFFFFF) == readbuf[1])) {
+            if (verbose) {
+                printf("Reading %08X%08X from %03X ", readbuf[1], readbuf[0], addr & 0xFF8);
+                printf(" MATCH\n");
+            } 
+        } else { 
+            printf("Reading %08X%08X from %03X ", readbuf[1], readbuf[0], addr & 0xFF8);
+            printf(" MISMATCH, EXPECTED %08X%08X\n", (uint32_t) ((exp_data>>32) & 0xFFFFFFFF), (uint32_t) (exp_data & 0xFFFFFFFF));
+        }
+    }
+}
+
+void check_read_flag_status_reg_ready () {
+    int i;
+
+    if (verbose) printf("checking flash flag status register ready bit\n");
+
+    i=0;
+    do { flash_ctl_wr(0x000, (uint64_t) 0x0000000100000070);      // READ_FLAG_STATUS_CMD 
+         flash_ctl_rd(0x018, (uint64_t) 0x0000000000000081, 0);   // expected data 81h
+         if ((i & 0x00003FFF) == 0x00003FFF) printf(".");         // User output for each 16K times through the loop
+         fflush(stdout);                                          // Cause the buffered output to print each period
+         i=i+1;
+    } while ((readbuf[0] & 0x80) != 0x80);                        // wait for bit[7] to go high, ready
+
+    if (verbose) printf("Flash flag status register = %02X ready bit is high\n", readbuf[0]);
+}
+
+void soft_reset() {
+    int err;
+    char        ibuf[1024];
+
+    // Assert soft reset to the qspi flash controller
+    writebuf[0] = 0x0; 
+    writebuf[1] = 0x0; 
+    writebuf[2] = 0x03;   // assert soft reset 
+    writebuf[3] = 0x0; 
+
+    if (verbose) printf("Soft Reset = 1 Writing Stream %d with %d Bytes\n", streamhandle, 16);
+    err = pico->WriteStream(streamhandle, writebuf, 16);
+    if (err < 0) {
+        fprintf(stderr, "soft_reset: WriteStream error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
+        exit(-1);
+    }
+
+    usleep(10000); // 10 ms
+
+    writebuf[0] = 0x0; 
+    writebuf[1] = 0x0; 
+    writebuf[2] = 0x02;   // deassert soft reset 
+    writebuf[3] = 0x0; 
+
+    if (verbose) printf("Soft Reset = 0 Writing Stream %d with %d Bytes\n", streamhandle, 16);
+    err = pico->WriteStream(streamhandle, writebuf, 16);
+    if (err < 0) {
+        fprintf(stderr, "soft_reset: WriteStream error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
+        exit(-1);
+    }
+
+    // Let the fifos come out of reset before sending the next command
+    usleep(10000); // 10 ms
+}
+
+// Program the flash volatile configuration register, enhanced volatile configuration register
+// and set the controller into quad spi mode
+void init_flash() {
+
+    flash_ctl_wr(0x000, (uint64_t) 0x0000000000000006); // WEL
+    flash_ctl_wr(0x020, (uint64_t) 0x00000000000000AB); // wr_data, 10 dummy clock cycles, disable XIP, continous addresses no wrap
+    flash_ctl_wr(0x000, (uint64_t) 0x0000000100000081); // wr_cmd, write volatile config reg
+    flash_ctl_wr(0x000, (uint64_t) 0x0000000000000006); // WEL
+    flash_ctl_wr(0x020, (uint64_t) 0x000000000000006F); // wr_data, Quad I/O protocol enabled, DTR disabled, disable hold on DQ3, 30 ohm drive DAA - was 7F
+    flash_ctl_wr(0x000, (uint64_t) 0x0000000100000061); // wr cmd, write enhanced volatile config reg
+    flash_ctl_wr(0x010, (uint64_t) 0x0000000000000100); // set qspi_mode=1, puts the controller in qspi mode
+    flash_ctl_wr(0x000, (uint64_t) 0x0000000000000006); // WEL
+    flash_ctl_wr(0x000, (uint64_t) 0x00000000000000B7); // ENTER 4BYTE Address Mode
+}
+
+// Reset the flash volatile configuration register, enhanced volatile configuration register
+// and extended address register to the power-on reset default condition.
+// This is necessary for Xilinx FPGA's to be able to successfully configure from flash
+void reset_flash() {
+
+    flash_ctl_wr(0x000, (uint64_t) 0x0000000000000066); // Reset Enable
+    flash_ctl_wr(0x000, (uint64_t) 0x0000000000000099); // Reset Memory
+    flash_ctl_wr(0x000, (uint64_t) 0x0000000000000006); // WEL
+    flash_ctl_wr(0x000, (uint64_t) 0x00000000000000E9); // Exit 4BYTE Address Mode
+    flash_ctl_wr(0x010, (uint64_t) 0x0000000000000000); // set qspi_mode=0, takes the controller out of qspi mode
+}
+
+void read_id_reg() {
+
+    flash_ctl_wr(0x000, (uint64_t) 0x00000003000000AF); // issue rd_id cmd - 3 bytes
+    flash_ctl_rd(0x018, (uint64_t) 0x000000000020BA20, 0); // rd_data out - MFG ID - 20h micron, MEM_TYPE - BAh(3V),BBh (1.8V), MEM_CAP - 21h(1Gb),20h(512Mb)
+
+    printf("\nQSPI Flash READID Register = %08X \n", readbuf[0]);
+    printf("Key: MFG ID - 20h micron, MEM_TYPE - BAh(3V), BBh(1.8V), MEM_CAP - 22h(2Gb), 21h(1Gb), 20h(512Mb), 19h(256Mb) \n");
+}
+
+// Reverse the bytes of an 8-byte word swapping bytes 7,6,5,4,3,2,1,0 to 0,1,2,3,4,5,6,7
+// This is for the mgmt_qspi_flash_ctl.v module that sends out byte 7 first and byte 0 last
+// but the flash expects byte 0 first and byte 7 last.
+void byte_swap(unsigned char* ptr) {       
+    unsigned char  tmp;
+
+    for (int i=0; i<4; i++) {
+        tmp        = *(ptr+i);
+        *(ptr+i)   = *(ptr+7-i);
+        *(ptr+7-i) = tmp;
+    }
+}
+
+// Read and print 256-bytes of flash data
+void read_flash(uint32_t size, uint32_t address) {
+
+    printf("\nReading Flash Contents: %xh Bytes from %08Xh\n", size, address);
+    printf("Output is compatible with:  od -t x4 -A x -v bitstream.bin \n");
+
+    for (int j=0; j<size; j=j+8) {
+        flash_ctl_wr(0x008, (uint64_t) address + j);           // spi_addr for 8 bytes of data
+        flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec);    // issue the quad fast read cmd - 0xec, with 10 dummy clocks
+        flash_ctl_rd(0x018, (uint64_t) 0x0000000000000000, 0); // rd the data_out register to get the 8 bytes of data
+        byte_swap((unsigned char*) &readbuf[0]);               // Swap order of 8-bytes for the mgmt_qspi_flash_ctl.v 
+        if ((j & 0x008) == 0) {
+            printf("\n%06x ", j);
+            printf("%08x %08x ", readbuf[0], readbuf[1]);
+        } else {
+            printf("%08x %08x", readbuf[0], readbuf[1]);
+        }
+    }
+    printf("\n");
+}
+
+// Erase entire flash contents 
+// Issue die erase command C4 for the 1Gbit flash on the SB-852, each die is 512Mbit or 64MB
+// Issue die erase command C4 for the 2Gbit flash on the SB-851, each die is 512Mbit or 64MB
+// Issue bulk erase command C7 for the 256Mbit flash on the AC-510
+// Issue bulk erase command C7 for the 512Mbit flash on the AC-511
+void erase_flash() {
+    int flash_size = 0;
+
+    read_id_reg();                     // get the flash device size
+    flash_size = readbuf[0] & 0xFF;  // 22h(2Gb), 21h(1Gb), 20h(512Mb), 19h(256Mb) 
+
+    switch (flash_size) {
+        case 0x19:
+                   printf("\nErasing Flash contents - Flash Size is 256 Mbit\n");
+                   flash_ctl_wr(0x000, (uint64_t) 0x0000000000000006); // WEL
+                   flash_ctl_wr(0x008, (uint64_t) 0x0000000000000000); // Fill out the SPI_ADDR - 0x00000000
+                   flash_ctl_wr(0x000, (uint64_t) 0x00000000000000C7); // Bulk Erase, NQ25_256 requires C7 
+                   check_read_flag_status_reg_ready ();  
+                   break;
+        case 0x20:
+                   printf("\nErasing Flash contents - Flash Size is 512 Mbit\n");
+                   flash_ctl_wr(0x000, (uint64_t) 0x0000000000000006); // WEL
+                   flash_ctl_wr(0x008, (uint64_t) 0x0000000000000000); // Fill out the SPI_ADDR - 0x00000000
+                   flash_ctl_wr(0x000, (uint64_t) 0x00000000000000C7); // Bulk Erase, NQ25_512 requires C7 
+                   check_read_flag_status_reg_ready ();  
+                   break;
+        case 0x21:
+                   printf("\nErasing Flash contents - Flash Size is 1 Gbit\n");
+                   printf("Erasing Die 1 ");
+                   flash_ctl_wr(0x000, (uint64_t) 0x0000000000000006); // WEL
+                   flash_ctl_wr(0x008, (uint64_t) 0x0000000000000000); // Fill out the SPI_ADDR - 0x00000000 for die 1, 64MB
+                   flash_ctl_wr(0x000, (uint64_t) 0x00000000000000C4); // Die Erase, MT25_1G requires C4 
+                   check_read_flag_status_reg_ready ();
+                   flash_ctl_wr(0x000, (uint64_t) 0x0000000000000006); // WEL
+                   flash_ctl_wr(0x008, (uint64_t) 0x0000000004000000); // Fill out the SPI_ADDR - 0x04000000 for die 2, 64MB
+                   flash_ctl_wr(0x000, (uint64_t) 0x00000000000000C4); // Die Erase, MT25_1G requires C4 
+                   printf("\nErasing Die 2 ");
+                   check_read_flag_status_reg_ready ();  
+                   break;
+        case 0x22:
+                   printf("\nErasing Flash contents - Flash Size is 2 Gbit\n");
+                   flash_ctl_wr(0x000, (uint64_t) 0x0000000000000006); // WEL
+                   flash_ctl_wr(0x008, (uint64_t) 0x0000000000000000); // Fill out the SPI_ADDR - 0x00000000 for die 1, 64MB
+                   flash_ctl_wr(0x000, (uint64_t) 0x00000000000000C4); // Die Erase, MT25_1G requires C4 
+                   printf("Erasing Die 1 ");
+                   check_read_flag_status_reg_ready ();
+                   flash_ctl_wr(0x000, (uint64_t) 0x0000000000000006); // WEL
+                   flash_ctl_wr(0x008, (uint64_t) 0x0000000004000000); // Fill out the SPI_ADDR - 0x04000000 for die 2, 64MB
+                   flash_ctl_wr(0x000, (uint64_t) 0x00000000000000C4); // Die Erase, MT25_1G requires C4 
+                   printf("\nErasing Die 2 ");
+                   check_read_flag_status_reg_ready ();
+                   flash_ctl_wr(0x000, (uint64_t) 0x0000000000000006); // WEL
+                   flash_ctl_wr(0x008, (uint64_t) 0x0000000008000000); // Fill out the SPI_ADDR - 0x08000000 for die 3, 64MB
+                   flash_ctl_wr(0x000, (uint64_t) 0x00000000000000C4); // Die Erase, MT25_1G requires C4 
+                   printf("\nErasing Die 3 ");
+                   check_read_flag_status_reg_ready ();
+                   flash_ctl_wr(0x000, (uint64_t) 0x0000000000000006); // WEL
+                   flash_ctl_wr(0x008, (uint64_t) 0x000000000C000000); // Fill out the SPI_ADDR - 0x0C000000 for die 4, 64MB
+                   flash_ctl_wr(0x000, (uint64_t) 0x00000000000000C4); // Die Erase, MT25_2G requires C4 
+                   printf("\nErasing Die 4 ");
+                   check_read_flag_status_reg_ready ();  
+                   break;
+        default:
+                   printf("Flash Size is unknown, exiting");
+                   break;
+    }
+    printf("\n");
+}
+
+void erase_half_flash(int half) {
+    int      flash_size = 0;
+    uint32_t addr; 
+
+    read_id_reg();                   // get the flash device size
+    flash_size = readbuf[0] & 0xFF;  // 22h(2Gb), 21h(1Gb), 20h(512Mb), 19h(256Mb) 
+
+    switch (flash_size) {
+        case 0x19:
+                   printf("\nErasing %s half of Flash contents - Flash Size is 256 Mbit\n", half ? "High" : "Low");
+                   if (half == 1) {
+                       addr = 0x01000000;             // 16MB is half way through 256 Mbit or 32 MB flash
+                   } else {
+                       addr = 0x0;                    // 16MB is half way through 256 Mbit or 32 MB flash
+                   }
+                   printf("\nErasing Sectors ");
+                   for (int i=addr; i < (addr+0x01000000); i+= 0x10000) {  // erase 64KB per iteration
+                       flash_ctl_wr(0x000, (uint64_t) 0x0000000000000006); // WEL
+                       flash_ctl_wr(0x008, (uint64_t) i);                  // Fill out the SPI_ADDR of 64KB sector 
+                       flash_ctl_wr(0x000, (uint64_t) 0x00000000000000D8); // Sector Erase  
+                       printf(".");
+                       check_read_flag_status_reg_ready ();
+                   }
+                   break;
+        case 0x20:
+                   printf("\nErasing %s half of Flash contents - Flash Size is 512 Mbit\n", half ? "High" : "Low");
+                   if (half == 1) {
+                       addr = 0x02000000;             // 32MB is half way through 512 Mbit or 64 MB flash
+                   } else {
+                       addr = 0x0;                    // 32MB is half way through 512 Mbit or 64 MB flash
+                   }
+                   printf("\nErasing Sectors ");
+                   for (int i=addr; i < (addr+0x02000000); i+= 0x10000) {  // erase 64KB per iteration
+                       flash_ctl_wr(0x000, (uint64_t) 0x0000000000000006); // WEL
+                       flash_ctl_wr(0x008, (uint64_t) i);                  // Fill out the SPI_ADDR of 64KB sector 
+                       flash_ctl_wr(0x000, (uint64_t) 0x00000000000000D8); // Sector Erase  
+                       printf(".");
+                       check_read_flag_status_reg_ready ();
+                   }
+                   break;
+        case 0x21:
+                   printf("\nErasing %s half of Flash contents - Flash Size is 1 Gbit\n", half ? "High" : "Low");
+                   flash_ctl_wr(0x000, (uint64_t) 0x0000000000000006);     // WEL
+                   if (half == 1) {
+                       flash_ctl_wr(0x008, (uint64_t) 0x0000000004000000); // Fill out the SPI_ADDR - 0x04000000 for die 2, 64MB
+                       printf("Erasing Die 2 ");
+                   } else {
+                       flash_ctl_wr(0x008, (uint64_t) 0x0000000000000000); // Fill out the SPI_ADDR - 0x00000000 for die 1, 64MB
+                       printf("Erasing Die 1 ");
+                   }
+                   flash_ctl_wr(0x000, (uint64_t) 0x00000000000000C4);     // Die Erase, MT25_1G requires C4 
+                   check_read_flag_status_reg_ready ();
+                   break;
+        case 0x22:
+                   printf("\nErasing %s half of Flash contents - Flash Size is 2 Gbit\n", half ? "High" : "Low");
+                   flash_ctl_wr(0x000, (uint64_t) 0x0000000000000006);     // WEL
+                   if (half == 1) {
+                       flash_ctl_wr(0x008, (uint64_t) 0x0000000008000000); // Fill out the SPI_ADDR - 0x04000000 for die 3, 64MB
+                       printf("Erasing Die 3 ");
+                   } else {
+                       flash_ctl_wr(0x008, (uint64_t) 0x0000000000000000); // Fill out the SPI_ADDR - 0x00000000 for die 1, 64MB
+                       printf("Erasing Die 1 ");
+                   }
+                   flash_ctl_wr(0x000, (uint64_t) 0x00000000000000C4);     // Die Erase, MT25_2G requires C4 
+                   check_read_flag_status_reg_ready ();
+                   flash_ctl_wr(0x000, (uint64_t) 0x0000000000000006);     // WEL
+                   if (half == 1) {
+                       flash_ctl_wr(0x008, (uint64_t) 0x000000000C000000); // Fill out the SPI_ADDR - 0x04000000 for die 4, 64MB
+                       printf("\nErasing Die 4 ");
+                   } else {
+                       flash_ctl_wr(0x008, (uint64_t) 0x0000000004000000); // Fill out the SPI_ADDR - 0x00000000 for die 2, 64MB
+                       printf("\nErasing Die 2 ");
+                   }
+                   flash_ctl_wr(0x000, (uint64_t) 0x00000000000000C4);     // Die Erase, MT25_2G requires C4 
+                   check_read_flag_status_reg_ready ();
+                   break;
+        default:
+                   printf("Flash Size is unknown, exiting");
+                   break;
+    }
+    printf("\n");
+}
+
+// Program flash device with contents of .bin file using 256-byte writes 
+// The flash must be previously erased for the program to work correctly
+void program_flash(char* progbitfile, uint32_t address, uint32_t model) {
+
+    FILE           *file;
+    int            size=0;
+    int            bitstream_pad;
+    unsigned char* data = NULL;
+    uint64_t*      data_ptr;
+    int            err;
+    uint64_t       program_data;
+ 
+    file = fopen(progbitfile, "rb");
+    if (file == 0) {
+        printf("Couldn't open file %s \n", progbitfile);
+        exit(-1);
+    }
+
+    printf("\nProgramming Flash device with '%s' \n", progbitfile);
+
+    fseek(file, 0, SEEK_END);
+    size = ftell(file);
+    fseek(file, 0, SEEK_SET);
+
+    bitstream_pad = 256 - (size % 256);
+
+    printf("Bitstream file size = %d, padding %d bytes \n", size, bitstream_pad);
+
+    data = (unsigned char *) malloc(size + bitstream_pad);  // allocate memory for the bitstream file, rounded up to the next 256-byte boundary
+    memset(data+size, 0xff, bitstream_pad);                 // Set the pad bytes to all 1s
+
+    // Read the bitstream file
+    if ((err = fread(data, 1, size, file)) != size) {
+        free(data);
+        printf("fread error\n");
+        exit(-1);
+    }
+    fclose(file);
+
+    data_ptr = (uint64_t*) data;
+
+    printf("Programming %08Xh bytes at address %08Xh\n", size+bitstream_pad, address);
+
+    for (int i=0; i<(size + bitstream_pad); i+=256) {       // program 256 bytes per loop
+        if ((i & 0x3FF00) == 0x3FF00) printf(".");          // User output for each 256K bytes programmed
+        fflush(stdout);                                     // Cause the buffered output to print each period
+
+        flash_ctl_wr(0x008, (uint64_t) (address + i));      // Set the ADDR to program 
+        for (int j=0; j<32; j++) {
+            program_data = *(data_ptr+i/8+j);               // 64-bit Data to program to the flash
+            byte_swap((unsigned char*) &program_data);      // Swap order of the 8-bytes to program for the mgmt_qspi_flash_ctl.v 
+            flash_ctl_wr(0x020, (uint64_t) program_data);   // Fill the WR_DATA, 8 bytes per write, 256 bytes per flash program command
+        }
+        check_read_flag_status_reg_ready ();                // Wait for flash busy to deassert from last program command before issuing the next one
+        flash_ctl_wr(0x000, (uint64_t) 0x0000000000000006); // WEL
+        flash_ctl_wr(0x000, (uint64_t) 0x0000010000000032); // issue QUAD_FAST_PGM, specify 256 bytes of WR_DATA   
+    }
+    check_read_flag_status_reg_ready ();                    // Wait for flash busy to deassert after the last program command
+    printf("\nFlash programming done \n");
+}
+
+// Verify flash device contents begninning at address against verifybitfile
+void verify_flash(char* verifybitfile, uint32_t address) {
+
+    FILE           *file;
+    int            size=0;
+    int            bitstream_pad;
+    unsigned char* data = NULL;
+    uint64_t*      data_ptr;
+    int            err;
+ 
+    file = fopen(verifybitfile, "rb");
+    if (file == 0) {
+        printf("Couldn't open file %s \n", verifybitfile);
+        exit(-1);
+    }
+
+    printf("\nVerify Flash contents beginning at %08Xh against '%s' \n", address, verifybitfile);
+
+    fseek(file, 0, SEEK_END);
+    size = ftell(file);
+    fseek(file, 0, SEEK_SET);
+
+    bitstream_pad = 256 - (size % 256);
+
+    printf("Bitstream file size = %d, padding %d bytes \n", size, bitstream_pad);
+
+    data = (unsigned char *) malloc(size + bitstream_pad);  // allocate memory for the bitstream file, rounded up to the next 256-byte boundary
+    memset(data+size, 0xff, bitstream_pad);                 // Set the pad bytes to all 1s
+
+    // Read the bitstream file
+    if ((err = fread(data, 1, size, file)) != size) {
+        free(data);
+        printf("fread error\n");
+        exit(-1);
+    }
+    fclose(file);
+
+    data_ptr = (uint64_t*) data;
+
+    for (int i=0; i<(size + bitstream_pad); i+=8) {            // Read and verify 8-bytes per loop
+        if ((i & 0x1FFF8) == 0x1FFF8) printf(".");             // User output for each 128K iterations
+        fflush(stdout);                                        // Cause the buffered output to print each period
+
+        flash_ctl_wr(0x008, (uint64_t) (address + i));         // spi_addr to read for 8 bytes of data
+        flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec);    // issue the quad fast read cmd - 0xec, with 10 dummy clocks
+        flash_ctl_rd(0x018, (uint64_t) 0x0000000000000000, 0); // rd the data_out register to get the 8 bytes of data
+        byte_swap((unsigned char*) &readbuf[0]);               // Swap order of 8-bytes for the mgmt_qspi_flash_ctl.v 
+ 
+        if (*data_ptr++ != *((uint64_t *) readbuf)) {
+            printf("\nVerify FAILURE at flash address %08X, Expected %016lX Found %016lX \n", address+i, *--data_ptr, *((uint64_t *) readbuf));
+            exit(-1);
+        }
+
+    }
+    printf("\nFlash Verify done, flash contents match %s \n", verifybitfile);
+}
+
+// 64K Sector erase, 256-byte write at 0000h, 256-byte page write at 8000h, read back and verify
+// Note this test does not do a byte swap on the data so the mgmt_qspi_flash_ctl.v actually 
+// reverses the byte order of each 8-byte word but it is just data and if what is written is what
+// is read then it passes.
+void quad_256byte_write_read_test () {
+    printf("\nRunning 4BYTE_QUAD_256BYTE_WRBUFFER_WR_RD_multiple Test \n");
+
+    flash_ctl_wr(0x000, (uint64_t) 0x0000000000000006); // WEL
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000000000); // Fill out the SPI_ADDR - sector 0- 0x00000000
+    flash_ctl_wr(0x000, (uint64_t) 0x00000000000000D8); // Sector Erase  DAA - was DCh, not recognized on NQ25_256
+    check_read_flag_status_reg_ready ();
+
+    flash_ctl_wr(0x000, (uint64_t) 0x0000000000000006); // WEL
+    flash_ctl_wr(0x020, (uint64_t) 0x123456789ABCDEF6); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x2222222222222222); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x3333333333333333); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x4444444444444444); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x5555555555555555); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x6666666666666666); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x7777777777777777); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x8888888888888888); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x9999999999999999); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0xAAAAAAAAAAAAAAAA); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0xBBBBBBBBBBBBBBBB); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0xCCCCCCCCCCCCCCCC); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0xDDDDDDDDDDDDDDDD); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0xEEEEEEEEEEEEEEEE); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0xFFFFFFFFFFFFFFFF); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x0000000000000000); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x1234111111111111); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x1234222222222222); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x1234333333333333); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x1234444444444444); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x1234555555555555); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x1234666666666666); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x1234777777777777); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x1234888888888888); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x1234999999999999); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x1234AAAAAAAAAAAA); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x1234BBBBBBBBBBBB); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x1234CCCCCCCCCCCC); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x1234DDDDDDDDDDDD); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x1234EEEEEEEEEEEE); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x1234FFFFFFFFFFFF); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0xAAAAAAAAAFFFFFFF); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000000000); // Start at ADDR 0x00000000 
+    flash_ctl_wr(0x000, (uint64_t) 0x0000010000000032); // issue QUAD_FAST_PGM, & specify 256 bytes of WR_DATA   DAA 3Eh does not work on N25Q256
+
+    check_read_flag_status_reg_ready ();
+
+    flash_ctl_wr(0x000, (uint64_t) 0x0000000000000006); // WEL
+    flash_ctl_wr(0x020, (uint64_t) 0x1111111112341111); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x2222222212342222); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x3333333312343333); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x4444444412344444); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x5555555512345555); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x6666666612346666); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x7777777712347777); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x8888888812348888); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x9999999912349999); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0xAAAAAAAA1234AAAA); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0xBBBBBBBB1234BBBB); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0xCCCCCCCC1234CCCC); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0xDDDDDDDD1234DDDD); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0xEEEEEEEE1234EEEE); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0xFFFFFFFF1234FFFF); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0xAAAAAAAAAFFFFFFF); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x223456789ABCDEF6); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x5555555511111111); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0xaaaa111155559999); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x8888bbbb22227777); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x999999993333dddd); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x6666666666666666); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x5555555522222222); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x8888888888888888); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x2222221111111199); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0xAA444444443333AA); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0xBBBB444444444BBB); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0xCC666666666666CC); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0xDDDDD33333D3333D); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0xEE333333333222EE); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0xFFFFFFFFFFFFFFFF); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x020, (uint64_t) 0x0555502222220400); // Fill the WR_DATA 0-7bytes 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000008000); // Start at ADDR 0x00008000 
+    flash_ctl_wr(0x000, (uint64_t) 0x0000010000000032); // issue QUAD_FAST_PGM, & specify 256 bytes of WR_DATA   
+
+    check_read_flag_status_reg_ready ();
+
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000000000); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec, 10 dummy clocks
+    flash_ctl_rd(0x018, (uint64_t) 0x123456789abcdef6, 1); // rd the data_out register to get first 8 bytes
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000000008); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x2222222222222222, 1);
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000000010); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x3333333333333333, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000000018); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x4444444444444444, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000000020); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x5555555555555555, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000000028); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x6666666666666666, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000000030); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x7777777777777777, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000000038); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x8888888888888888, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000000040); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x9999999999999999, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000000048); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0xaaaaaaaaaaaaaaaa, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000000050); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0xbbbbbbbbbbbbbbbb, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000000058); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0xcccccccccccccccc, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000000060); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0xdddddddddddddddd, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000000068); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0xeeeeeeeeeeeeeeee, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000000070); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0xffffffffffffffff, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000000078); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x0000000000000000, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000000080); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x1234111111111111, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000000088); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x1234222222222222, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000000090); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x1234333333333333, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000000098); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x1234444444444444, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000000a0); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x1234555555555555, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000000a8); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x1234666666666666, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000000b0); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x1234777777777777, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000000b8); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x1234888888888888, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000000c0); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x1234999999999999, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000000c8); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x1234aaaaaaaaaaaa, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000000d0); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x1234bbbbbbbbbbbb, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000000d8); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x1234cccccccccccc, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000000e0); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x1234dddddddddddd, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000000e8); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x1234eeeeeeeeeeee, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000000f0); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x1234ffffffffffff, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000000f8); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0xaaaaaaaaafffffff, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000008000); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x1111111112341111, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000008008); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x2222222212342222, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000008010); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x3333333312343333, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000008018); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x4444444412344444, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000008020); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x5555555512345555, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000008028); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x6666666612346666, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000008030); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x7777777712347777, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000008038); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x8888888812348888, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000008040); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x9999999912349999, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000008048); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0xAAAAAAAA1234AAAA, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000008050); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0xBBBBBBBB1234BBBB, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000008058); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0xCCCCCCCC1234CCCC, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000008060); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0xDDDDDDDD1234DDDD, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000008068); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0xEEEEEEEE1234EEEE, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000008070); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0xFFFFFFFF1234FFFF, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000008078); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0xAAAAAAAAAFFFFFFF, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000008080); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x223456789ABCDEF6, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000008088); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x5555555511111111, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000008090); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0xaaaa111155559999, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x0000000000008098); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x8888bbbb22227777, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000080a0); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x999999993333dddd, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000080a8); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x6666666666666666, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000080b0); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x5555555522222222, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000080b8); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x8888888888888888, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000080c0); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x2222221111111199, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000080c8); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0xAA444444443333AA, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000080d0); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0xBBBB444444444BBB, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000080d8); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0xCC666666666666CC, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000080e0); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0xDDDDD33333D3333D, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000080e8); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0xEE333333333222EE, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000080f0); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0xFFFFFFFFFFFFFFFF, 1); 
+    flash_ctl_wr(0x008, (uint64_t) 0x00000000000080f8); // fill out the spi_addr for first 8 bytes
+    flash_ctl_wr(0x000, (uint64_t) 0x000000080000a0ec); // issue the quad fast read cmd - 0xec
+    flash_ctl_rd(0x018, (uint64_t) 0x0555502222220400, 1); 
+}
+
+/*
+ * prints out the correct usage for this application
+ */
+void PrintUsage(){
+    printf("Syntax: ./flash_ctl [--b <bitfile> | --m <module>]  [optional args]\n");
+    printf("    --b               <bitfile>     AC-510, AC-511 only, Loads the FPGA with the bitfile containing the flash_ctl firmware\n");
+    printf("    --device_id       <device id>   /dev/pico*, program specific pico device \n");
+    printf("    --m               <module>      Micron module to use for this run (exclusive with --b option)\n");
+    printf("                                    Supported modules: 0x510, 0x511, 0x851, 0x852, 0xEA5\n");
+    printf("    --readid                        Reads the Flash ID register \n");
+    printf("    --write_read_test               64K Sector Erase, program, read back and verify two 256-byte pages\n");
+    printf("    --read            <address>     Read and print --size bytes from flash beginning at address, rounded to 256-byte boundary\n");
+    printf("    --size            <bytes>       Number of bytes used for the --read command, default 256-bytes\n");
+    printf("    --erase                         Erase the entire flash device\n");
+    printf("    --erase_half        0 | 1       Erase the low half(0) or high half(1) of the flash device\n");
+    printf("    --program         <binfile>     Program .bin file into the flash device starting at --address. Flash must be previously erased\n");
+    printf("    --verify          <binfile>     Verify the flash contents starting at --address against .bin file \n");
+    printf("    --address         <address>     Starting Address in flash used for --program, --verify and --config, default 0\n");
+    printf("    --select_flash      0 | 1       SB-852 only, Set the SB-852 to use flash device 0 or 1\n");
+    printf("    --config                        Issue IPROG command to cause the FPGA to reconfigure from flash starting at --address.  \n");
+    printf("    --cpldregs                      SB-852 only, Read and print all of the SB-852 CPLD registers\n");
+    printf("\n");
+}
+
+/*
+ * Parse the command-line arguments
+ */
+int ParseArgs(int argc, char* argv[], char** bitfile, uint32_t *device_id, uint32_t* model, uint32_t* readid, uint32_t* write_read_test, 
+              uint32_t* read, uint32_t* address, uint32_t* size, uint32_t* erase, uint32_t* erase_half, char** progbitfile, char** verifybitfile, uint32_t* verifyaddress, 
+              uint32_t* select_flash, uint32_t* config, uint32_t* cpldregs) {
+
+    // for parsing the command-line options
+    int         c;
+    int         option_index;
+
+    // flags for the command-line options
+    int         readid_flag          = 0;
+    int         write_read_test_flag = 0;
+    int         erase_flag           = 0;
+    int         config_flag          = 0;
+    int         cpldregs_flag        = 0;
+
+    // these are the command-line options
+    static struct option long_options[] = {
+        // these options set a flag
+        {"readid",          no_argument,       &readid_flag,            1},
+        {"write_read_test", no_argument,       &write_read_test_flag,   1},
+        {"erase",           no_argument,       &erase_flag,             1},
+        {"config",          no_argument,       &config_flag,            1},
+        {"cpldregs",        no_argument,       &cpldregs_flag,          1},
+
+        // these options are distinguished by their indices
+        {"b",               required_argument,  0,         'b'},
+        {"m",               required_argument,  0,         'm'},
+        {"read",            required_argument,  0,         'r'},
+        {"size",            required_argument,  0,         's'},
+        {"program",         required_argument,  0,         'p'},
+        {"verify",          required_argument,  0,         'v'},
+        {"address",         required_argument,  0,         'a'},
+        {"select_flash",    required_argument,  0,         'f'},
+        {"erase_half",      required_argument,  0,         'e'},
+        {"device_id",       required_argument,  0,         'd'},
+        {0,                 0,                  0,          0}
+    };
+
+    // parse the command-line arguments
+    while ((c = getopt_long(argc, argv, "", long_options, &option_index)) != -1) {
+        switch (c) {
+           // these options set a flag - no further processing required
+            case 0:
+                break;
+            // these options have arguments
+            case 'b':
+                *bitfile    = optarg;
+                if (*model != 0){
+                    fprintf(stderr, "Cannot specify both a bitfile and a model; i.e. --b argument is exclusive with --m argument.\n");
+                    return -1;
+                }
+                break;
+            case 'm':
+                *model        = strtoul(optarg, NULL, 0);
+                if (*bitfile != NULL){
+                    fprintf(stderr, "Cannot specify both a bitfile and a model; i.e. --b argument is exclusive with --m argument.\n");
+                    return -1;
+                }
+                break;
+            case 'd':
+                *device_id      = strtoul(optarg, NULL, 0);
+                break;
+            case 'r':
+                *read         = 1;
+                *address      = strtoul(optarg, NULL, 0);
+                if (*address != (*address & 0xFFFFFF00)) { 
+                    *address  = (*address & 0xFFFFFF00);    // round to 256-byte address
+                    printf("Using address = %08xh\n", *address);
+                }
+                break;
+            case 's':
+                *size      = strtoul(optarg, NULL, 0);
+                if (*size != (*size & 0xFFFFFF00)) { 
+                    *size  = (*size & 0xFFFFFF00);    // round to multiple of 256-bytes 
+                    if (*size != 0) {
+                        printf("Using size    = %xh\n", *size);
+                    }
+                }
+                if (*size == 0) { 
+                    *size  = 0x100;    // use 256-bytes min
+                    printf("Using size = %8xh\n", *size);
+                }
+                break;
+            case 'p':
+                *progbitfile  = optarg;
+                break;
+            case 'v':
+                *verifybitfile  = optarg;
+                break;
+            case 'a':
+                *verifyaddress  = strtoul(optarg, NULL, 0);
+                break;
+            case 'f':
+                *select_flash = strtoul(optarg, NULL, 0);
+                if ((*select_flash != 0) && (*select_flash != 1)) {
+                    printf("Invalid value for --select_flash, must be 0 or 1 \n");
+                }
+                break;
+            case 'e':
+                *erase_half = strtoul(optarg, NULL, 0);
+                if ((*erase_half != 0) && (*erase_half != 1)) {
+                    printf("Invalid value for --erase_half, must be 0 or 1 \n");
+                }
+                break;
+        }
+    }
+
+    // check command-line arguments
+
+    // set the options for the flags that are set 
+    if (readid_flag)          *readid          = 1;
+    if (write_read_test_flag) *write_read_test = 1;
+    if (erase_flag)           *erase           = 1;
+    if (config_flag)          *config          = 1;
+    if (cpldregs_flag)        *cpldregs        = 1;
+
+    return 0;
+}
+
+int main(int argc, char* argv[])
+{
+    int         err;
+    char        ibuf[1024];
+
+    char*       bitFileName     = NULL;
+    uint32_t    device_id         = 0xFF ;
+    uint32_t    model           = 0;
+    uint32_t    readid          = 0;
+    uint32_t    write_read_test = 0;
+    uint32_t    read            = 0;
+    uint32_t    address         = 0;
+    uint32_t    size            = 0x100;  // size in bytes for the read command
+    uint32_t    erase           = 0;
+    uint32_t    erase_half      = 0xFF;  // 0-erase low half of flash device, 1-erase high half
+    char*       progbitfile     = NULL;
+    char*       verifybitfile   = NULL;
+    uint32_t    verifyaddress   = 0;
+    uint32_t    select_flash    = 0xFF;  // 0-select flash device 0, 1-select flash device 1
+    uint32_t    config          = 0;  
+    uint32_t    cpldregs        = 0;  
+    uint32_t    cpld_reg_addr[] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x10, 0x12, 0x13, 0x14, 0x15,
+                                   0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1e, 0x20, 0x21, 0x22, 0x30, 0x31, 0x32, 0x33, 0x34,
+                                   0x35, 0x36, 0x37, 0x38, 0x39, 0x3C, 0x3D, 0x3E, 0x3F, 0x3A, 0x3B, 0x3C, 0x40, 0x41, 0x41, 0x48, 0x49,
+                                   0x4A, 0x4B, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x70, 0x71, 0x72, 0x73, 0x75, 0x76, 0xA0,
+                                   0xA1, 0xA8, 0xA9, 0xB0 };
+    uint32_t    tmp             = 0;
+    uint64_t    data64;
+
+    size_t	    bufsize = 8 * sizeof(uint32_t);  // 32-byte read and write buffers for stream commands to flash controller
+
+    ////////////////////////////////
+    // PARSE COMMAND-LINE OPTIONS //
+    ////////////////////////////////
+    if ((err = ParseArgs(argc, argv, &bitFileName, &device_id, &model, &readid, &write_read_test, &read, &address, &size, &erase, &erase_half, &progbitfile, 
+                         &verifybitfile, &verifyaddress, &select_flash, &config, &cpldregs)) < 0){
+        PrintUsage();
+        return EXIT_FAILURE;
+    }
+    
+    // The RunBitFile function will locate a Pico card that can run the given bit file, and is not already
+    // opened in exclusive-access mode by another program. It requests exclusive access to the Pico card
+    // so no other programs will try to reuse the card and interfere with us.
+    if (bitFileName != NULL) {
+
+
+        if (device_id != 0xFF) {
+    
+            int cardNum = device_id ;
+            printf("Loading FPGA %i with '%s' ...\n", cardNum, bitFileName);
+            pico = new PICODRV(cardNum);
+            err = pico->LoadFPGA(bitFileName);
+            if (err < 0) {
+                fprintf(stderr, "LoadFPGA error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
+                exit(1);
+            }
+        }
+        else {
+
+            printf("Loading FPGA with '%s' ...\n", bitFileName);
+            err = RunBitFile(bitFileName, &pico);
+            if (err < 0) {
+                // We use the PicoErrors_FullError function to decipher error codes from RunBitFile.
+                // This is more informative than just printing the numeric code, since it can report the name of a
+                //   file that wasn't found, for example.
+                fprintf(stderr, "%s: RunBitFile error: %s\n", argv[0], PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
+                exit(-1);
+            }
+        }
+        //model = 0x851;  // temporary, uncomment for software sim only since software sim requires --b 
+    } else if (model != 0) { 
+        printf("Finding an FPGA matching model = 0x%0X\n", model);
+        if ((err = FindPico(model, &pico)) < 0) {
+            printf("Could not find an FPGA matching 0x%0X \n", model);
+            exit(-1);
+        }
+    } else if (device_id != 0xFF) {
+        int cardNum = device_id ;
+        pico = new PICODRV(cardNum);
+
+    } else {
+        PrintUsage();  
+        exit(-1);
+    }
+
+    // Data goes out to the Flash Controller on WriteStream ID 115 and comes back to the host on ReadStream ID 115
+    // Function call to CreateStream opens stream ID 115
+    printf("Opening streams to and from the Flash Controller\n");
+    streamhandle = pico->CreateStream(115);   // FLASH_CTL_STREAM_ID = 115
+    if (streamhandle < 0) {
+        // All functions in the Pico API return an error code.  If that code is < 0, then you should use
+        // the PicoErrors_FullError function to decode the error message.
+        fprintf(stderr, "%s: CreateStream error: %s\n", argv[0], PicoErrors_FullError(streamhandle, ibuf, sizeof(ibuf)));
+        exit(-1);
+    }
+
+    // Pico streams come in two flavors: 4Byte wide, 16Byte wide (equivalent to 32bit, 128bit respectively)
+    // However, all calls to ReadStream and WriteStream must be 16Byte aligned (even for 4B wide streams)
+    // Allocate 16B aligned space for read and write stream buffers
+    // Similarily, the size of the call, in bytes, must be a multiple of 16Bytes. We will always use 16 or 32 bytes
+
+    if (malloc) {
+       err = posix_memalign((void**)&writebuf, 16, bufsize);
+       if (writebuf == NULL || err) {
+          fprintf(stderr, "%s: posix_memalign could not allocate array of %d bytes for write buffer\n", argv[0], (int) bufsize);
+          exit(-1);
+       }
+       err = posix_memalign((void**)&readbuf, 16, bufsize);
+       if (readbuf == NULL || err) {
+          fprintf(stderr, "%s: posix_memalign could not allocate array of %d bytes for read buffer\n", argv[0], (int) bufsize);
+          exit(-1);
+       }
+    } else {
+       printf("%s: malloc failed\n", argv[0]);
+       exit(-1);
+    }
+
+    // Assert PicoRst which goes to the mgmt_qspi_flash_ctl module
+    // We don't really need this.  s_rst also resets the mgmt_qspi_flash_ctl module
+    // and the flash chip itself does not have a reset pin on the AC-510 and the 
+    // reset pin on the SB-852 comes from the CPLD.
+    // PicoRst stops the clock to the flash
+    soft_reset();
+
+    printf("\n Beginning Flash Operations\n");
+
+    init_flash();
+
+    if (readid) {
+        read_id_reg(); 
+    }
+
+    if (write_read_test) { 
+        quad_256byte_write_read_test();                
+    } 
+
+    if (erase) {
+        erase_flash();
+    } 
+
+    if ((erase_half == 0) || (erase_half == 1)) {     
+        erase_half_flash(erase_half);
+    } 
+
+    if (progbitfile != NULL) {
+        program_flash(progbitfile, verifyaddress, model);
+    }
+
+    if (verifybitfile != NULL) {
+        verify_flash(verifybitfile, verifyaddress);
+    }
+
+    if (read) {
+        read_flash(size, address);
+    }
+  
+    if ((select_flash == 0) || (select_flash == 1)) {     // Do a read-modify-write and don't change any other bits besides bit 3
+        tmp = i2c_read(SB852_DEVICE_ADDRESS, SB852_CPLD_FPGA_CONFIG_REG, 1, 1);
+        printf("SB852_CPLD_FPGA_CONFIG_REG = %02X \n", tmp);
+        tmp = tmp & 0xF7;                // clear bit 3, the FPGA_IMG_SEL bit
+        tmp = tmp | (select_flash<<3);   // set bit 3, the FPGA_IMG_SEL bit to the value entered
+        printf("Setting the SB-852 CPLD bit FPGA_IMG_SEL to %d to select flash device %d \n", select_flash, select_flash);
+        i2c_write(SB852_DEVICE_ADDRESS, SB852_CPLD_FPGA_CONFIG_REG, 1, tmp, 1);
+        tmp = i2c_read(SB852_DEVICE_ADDRESS, SB852_CPLD_FPGA_CONFIG_REG, 1, 1);
+        printf("SB852_CPLD_FPGA_CONFIG_REG = %02X \n", tmp);
+    }
+
+    if (config) {
+        // Reset the flash volatile configuration register, enhanced volatile configuration register
+        // and extended address register to the power-on reset default condition so that FPGA
+        // configuration from flash works right.
+        reset_flash();
+
+        // Assert IPROG or internal PROGRAM_B command through the ICAP3E module
+        printf("Issuing IPROG command to reload the FPGA from flash address %08X\n", verifyaddress);
+
+        // ICAP module interface is at 4000h. Writing 4000h will cause the 
+        // firmware to issue this sequence:
+        // 0xFFFFFFFF   Dummy word
+        // 0xAA995566)  Sync word
+        // 0x20000000)  Type 1 NOOP
+        // 0x30020001)  Type 1 Write 1 word to WBSTAR
+        //   address )  Warm boot start address in the flash
+        // 0x30008001)  Type 1 Write 1 words to CMD
+        // 0x0000000F)  IPROG command 
+        // 0x20000000)  Type 1 NOOP
+
+        picobus_wr(0x4000, verifyaddress);  // Send IPROG to trigger reconfiguration from flash at verifyaddress
+
+        //data64 = picobus_rd(0x3000);      // temporary.  Uncomment for software simulation so the sim does not exit before the write is done.
+
+        // exit here because FPGA is going to go dead
+        exit(0);
+
+
+/*      // Optional config from flash methods for SB-851 and SB-852
+        // The iPROG command works for all Xilinx FPGAs so that is the primary method and this 
+        // code is just left for reference.
+        if (model == 0x851) {   // Assert PROGRAM_B through the program_b_register at 3000h in the StreamToFlashCtl module
+            printf("Reading SB851 PROGRAM_B register \n");
+            data64 = picobus_rd(0x3000); // program_b register is at 3000h
+            printf("SB851 PROGRAM_B register = %X \n", data64);
+
+            printf("Setting the SB-851 PROGRAM_B register to 0 to reload the FPGA from flash \n");
+            picobus_wr(0x3000, 0x0); // program_b register is at 3000h
+
+            printf("Reading SB851 PROGRAM_B register, it automatically returns high \n");
+            data64 = picobus_rd(0x3000); // program_b register is at 3000h
+            printf("SB851 PROGRAM_B register = %X \n", data64);
+        }
+
+        if (model == 0x852) {  // Assert PROGRAM_B through the CPLD 
+            tmp = i2c_read(SB852_DEVICE_ADDRESS, SB852_CPLD_TRIG_RESET_FPGA_PROG_REG, 1, 1);
+            printf("SB852_CPLD_TRIG_RESET_FPGA_PROG_REG = %02X \n", tmp);
+            tmp = tmp | 0x01;                // set bit 0, the TRIG_RESET_FPGA_PROG bit
+            printf("Setting the SB-852 CPLD TRIG_RESET_FPGA_PROG bit to 1 to reload the FPGA from flash \n");
+            i2c_write(SB852_DEVICE_ADDRESS, SB852_CPLD_TRIG_RESET_FPGA_PROG_REG, 1, tmp, 1);
+
+            // May need to just exit here since the fpga will go dead, however there is a default
+            // 2 second delay between triggering the reconfiguration and it starting so the 
+            // the software has time to remove the FPGA from the PCIe enumeration.
+            tmp = i2c_read(SB852_DEVICE_ADDRESS, SB852_CPLD_TRIG_RESET_FPGA_PROG_REG, 1, 1);
+            printf("SB852_CPLD_TRIG_RESET_FPGA_PROG_REG = %02X \n", tmp);
+        }
+*/
+    }
+//void i2c_write(uint32_t dev_addr, uint32_t reg_addr, uint32_t addr_bytes, uint32_t wrdata, uint32_t data_bytes);
+
+         //   printf("SB852 CPLD Register %02X = %02X \n", cpld_reg_addr[j], tmp);
+    if (cpldregs) {
+            i2c_write(SB852_DEVICE_ADDRESS, cpld_reg_addr[58], 1, 129, 1);
+            i2c_write(SB852_DEVICE_ADDRESS, cpld_reg_addr[60], 1, 5, 1);
+            i2c_write(0x74, 0x1, 1, 1, 1);//write to switch to page 1
+            i2c_write(0x74, 0x11F, 1, 2, 1);//write for si5341 output 4 to use N0 mux
+            i2c_write(0x74, 0x124, 1, 2, 1);//write for si5341 output 5 to use N0 mux
+            //i2c_write(0x74, 0x11F, 1, 1, 1);//write for si5341 output 4 to use N0 mux
+            //i2c_write(0x74, 0x124, 1, 1, 1);//write for si5341 output 5 to use N0 mux
+	    printf("written to cpld reg \n");
+        for (int j=0; j<2048; j++) {
+           // tmp = i2c_read(SB852_DEVICE_ADDRESS, cpld_reg_addr[j], 1, 1);
+           // printf("SB852 CPLD Register %02X = %02X \n", cpld_reg_addr[j], tmp);
+            tmp = i2c_read(0x74, j,1, 1);
+
+            printf("SI5345 Register %02X = %02X \n", j, tmp);	
+        }
+    }
+
+    // Reset the flash volatile configuration register, enhanced volatile configuration register
+    // and extended address register to the power-on reset default condition.
+    reset_flash();
+
+    // streams are automatically closed when the PicoDrv object is destroyed, or on program termination, but
+    //  we can also close a stream manually.
+    pico->CloseStream(streamhandle);
+
+    printf("All tests successful!\n");
+
+    exit(0);
+}
+
diff --git a/src/qspi_flash_ctl/i2c.cpp b/src/qspi_flash_ctl/i2c.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a1a15e96c0642018959d9ead6d898cac5470a2a3
--- /dev/null
+++ b/src/qspi_flash_ctl/i2c.cpp
@@ -0,0 +1,195 @@
+/*===================================================================================
+ File: i2c.cpp.
+
+ This module contains utilities designed to communicate with the I2C port on pico
+ boards such as teh SB-852.
+ A stream connects to a 64-bit PicoBus that is used to read and write registers in 
+ the I2C master which can then read and write registers in a slave I2C device such
+ as the CPLD on the SB-852. 
+
+ The software sequence for an I2C access is:
+ Write:
+ Poll  I2C_CONFIG_REG, bit[31] will be 1 while the I2C cycle is in progress, 0 when done
+ Write I2C_WRDATA_REG with WriteData[31:0]  // Dont care for reads
+ Write I2C_ADDR_REG   with the I2C Register Address[31:0] being accessed 
+ Write I2C_CONFIG_REG[23:0] with {data_byte_count[7:0], adr_byte_count[7:0], I2C_Device_Addr[6:0], R/Wn}
+
+ Read:
+ Poll  I2C_CONFIG_REG, bit[31] will be 1 while the I2C cycle is in progress, 0 when done
+ Write I2C_ADDR_REG   with the I2C Register Address[31:0] being accessed 
+ Write I2C_CONFIG_REG[23:0] with {data_byte_count[7:0], adr_byte_count[7:0], I2C_Device_Addr[6:0], R/Wn}
+ Poll  I2C_CONFIG_REG, bit[31] will be 1 while the I2C cycle is in progress, 0 when done
+ Read  I2C_RDDATA_REG for the read data, on read cycles only
+=====================================================================================*/
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <time.h>
+#include <getopt.h>
+#include <picodrv.h>
+#include <pico_errors.h>
+
+#include "i2c.h"
+
+// Globals
+extern PicoDrv     *pico;
+extern int         streamhandle;
+extern uint32_t    *readbuf, *writebuf;
+extern int         verbose;
+
+
+// Routine to poll the I2C controller cycle in progress bit until it is deasserted
+void check_i2c_busy() {
+    int     err;
+    char    ibuf[1024];
+
+    writebuf[0] = 0x8;            // Data length in bytes of stream command
+    writebuf[1] = I2C_CONFIG_REG; // Register Address
+    writebuf[2] = 0x1;            // Read cmd, bit[64]=1
+    writebuf[3] = 0x0;            // unused
+    if (verbose) {
+        printf("Checking I2C busy");
+    } 
+
+    do {
+        err = pico->WriteStream(streamhandle, &writebuf[0], 16);
+        if (err < 0) {
+            fprintf(stderr, "check_i2c_busy: WriteStream error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
+            exit(-1);
+        }
+        err = pico->ReadStream(streamhandle, readbuf, 16);
+        if (err < 0) {
+            fprintf(stderr, "check_i2c_busy: ReadStream error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
+            exit(-1);
+        }
+        if (verbose) {
+            printf("Register %04X  =  %08X\n", I2C_CONFIG_REG, readbuf[0]);
+        }
+    } while ((readbuf[0] & 0x80000000) == 0x80000000); // loop until I2C cycle in progress bit is low
+}
+
+// I2C write cycle.  Device Address is 7-bits.  Register Address is 0 to 4 bytes.  Write data is 1 to 4 bytes. 
+void i2c_write(uint32_t dev_addr, uint32_t reg_addr, uint32_t addr_bytes, uint32_t wrdata, uint32_t data_bytes) {
+    int err;
+    char        ibuf[1024];
+
+    check_i2c_busy();
+
+    // Write the I2C wrdata to the I2C_WRDATA_REG of the I2C controller through the stream interface
+    writebuf[0] = 0x8;                                // Data length in bytes of PicoBus cycle 
+    writebuf[1] = I2C_WRDATA_REG;                     // Picobus Address
+    writebuf[2] = 0x0;                                // Write cmd, bit[64] = 0
+    writebuf[3] = 0x0;                                // unused upper portion of stream command word
+    writebuf[4] = wrdata;                             // lower 32-bits of the PicoBus 64-bit data contains the I2C write data
+    writebuf[5] = 0;                                  // high 32-bits of the PicoBus 64-bit data, unused
+    writebuf[6] = 0x0;                                // unused upper portion of stream data word
+    writebuf[7] = 0x0;                                // unused upper portion of stream data word
+    if (verbose) printf("Writing %08X to %08X \n", writebuf[1], writebuf[4]);
+    err = pico->WriteStream(streamhandle, &writebuf[0], 32);  // Stream write of 32 bytes, two 128-bit words
+    if (err < 0) {
+        fprintf(stderr, "i2c_write: WriteStream error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
+        exit(-1);
+    }
+
+    // Write the I2C register address to the I2C_ADDR_REG of the I2C controller through the stream interface
+    writebuf[0] = 0x8;                                // Data length in bytes of PicoBus cycle 
+    writebuf[1] = I2C_ADDR_REG;                       // Picobus Address
+    writebuf[2] = 0x0;                                // Write cmd, bit[64] = 0
+    writebuf[3] = 0x0;                                // unused upper portion of stream command word
+    writebuf[4] = reg_addr;                           // lower 32-bits of the PicoBus 64-bit data contains the I2C register address
+    writebuf[5] = 0;                                  // high 32-bits of the PicoBus 64-bit data, unused
+    writebuf[6] = 0x0;                                // unused upper portion of stream data word
+    writebuf[7] = 0x0;                                // unused upper portion of stream data word
+    if (verbose) printf("Writing %08X to %08X \n", writebuf[1], writebuf[4]);
+    err = pico->WriteStream(streamhandle, &writebuf[0], 32);  // Stream write of 32 bytes, two 128-bit words
+    if (err < 0) {
+        fprintf(stderr, "i2c_write: WriteStream error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
+        exit(-1);
+    }
+
+    // Write the I2C device address, num address bytes and num data bytes to the I2C_CONFIG_REG of the I2C controller through the stream interface
+    writebuf[0] = 0x8;                                // Data length in bytes of PicoBus cycle 
+    writebuf[1] = I2C_CONFIG_REG;                     // Picobus Address
+    writebuf[2] = 0x0;                                // Write cmd, bit[64] = 0
+    writebuf[3] = 0x0;                                // unused upper portion of stream command word
+    // lower 32-bits of the PicoBus 64-bit data contains {data_byte_count[7:0], adr_byte_count[7:0], I2C_Device_Addr[6:0], R/Wn}
+    writebuf[4] = ((data_bytes & 0xFF)<<16) | ((addr_bytes & 0xFF)<<8) | ((dev_addr & 0xFF)<<1) | 0x00;    // R/Wn=0
+    writebuf[5] = 0;                                  // high 32-bits of the PicoBus 64-bit data, unused
+    writebuf[6] = 0x0;                                // unused upper portion of stream data word
+    writebuf[7] = 0x0;                                // unused upper portion of stream data word
+    if (verbose) printf("Writing %08X to %08X \n", writebuf[1], writebuf[4]);
+    err = pico->WriteStream(streamhandle, &writebuf[0], 32);  // Stream write of 32 bytes, two 128-bit words
+    if (err < 0) {
+        fprintf(stderr, "i2c_write: WriteStream error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
+        exit(-1);
+    }
+}
+
+// I2C read cycle.  Device Address is 7-bits.  Register Address is 0 to 4 bytes.  Write data is 1 to 4 bytes. 
+uint32_t i2c_read(uint32_t dev_addr, uint32_t reg_addr, uint32_t addr_bytes, uint32_t data_bytes) {
+    int err;
+    char        ibuf[1024];
+
+    check_i2c_busy();
+
+    // Write the I2C register address to the I2C_ADDR_REG of the I2C controller through the stream interface
+    writebuf[0] = 0x8;                                // Data length in bytes of PicoBus cycle 
+    writebuf[1] = I2C_ADDR_REG;                       // Picobus Address
+    writebuf[2] = 0x0;                                // Write cmd, bit[64] = 0
+    writebuf[3] = 0x0;                                // unused upper portion of stream command word
+    writebuf[4] = reg_addr;                           // lower 32-bits of the PicoBus 64-bit data contains the I2C register address
+    writebuf[5] = 0;                                  // high 32-bits of the PicoBus 64-bit data, unused
+    writebuf[6] = 0x0;                                // unused upper portion of stream data word
+    writebuf[7] = 0x0;                                // unused upper portion of stream data word
+    if (verbose) printf("Writing %08X to %08X \n", writebuf[1], writebuf[4]);
+    err = pico->WriteStream(streamhandle, &writebuf[0], 32);  // Stream write of 32 bytes, two 128-bit words
+    if (err < 0) {
+        fprintf(stderr, "i2c_read: WriteStream error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
+        exit(-1);
+    }
+
+    // Write the I2C device address, num address bytes and num data bytes to the I2C_CONFIG_REG of the I2C controller through the stream interface
+    writebuf[0] = 0x8;                                // Data length in bytes of PicoBus cycle 
+    writebuf[1] = I2C_CONFIG_REG;                     // Picobus Address
+    writebuf[2] = 0x0;                                // Write cmd, bit[64] = 0
+    writebuf[3] = 0x0;                                // unused upper portion of stream command word
+    // lower 32-bits of the PicoBus 64-bit data contains {data_byte_count[7:0], adr_byte_count[7:0], I2C_Device_Addr[6:0], R/Wn}
+    writebuf[4] = ((data_bytes & 0xFF)<<16) | ((addr_bytes & 0xFF)<<8) | ((dev_addr & 0xFF)<<1) | 0x01;    // R/Wn=1
+    writebuf[5] = 0;                                  // high 32-bits of the PicoBus 64-bit data, unused
+    writebuf[6] = 0x0;                                // unused upper portion of stream data word
+    writebuf[7] = 0x0;                                // unused upper portion of stream data word
+    if (verbose) printf("Writing %08X to %08X \n", writebuf[1], writebuf[4]);
+    err = pico->WriteStream(streamhandle, &writebuf[0], 32);  // Stream write of 32 bytes, two 128-bit words
+    if (err < 0) {
+        fprintf(stderr, "i2c_read: WriteStream error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
+        exit(-1);
+    }
+
+    check_i2c_busy();
+
+    // Send I2C controller read data command to the stream interface
+    writebuf[0] = 0x8;                                // Data length in bytes of stream command
+    writebuf[1] = I2C_RDDATA_REG;                     // Picobus Address
+    writebuf[2] = 0x1;                                // Read cmd, bit[64] = 1
+    writebuf[3] = 0x0;                                // unused upper portion of stream command word
+    err = pico->WriteStream(streamhandle, &writebuf[0], 16);
+    if (err < 0) {
+        fprintf(stderr, "i2c_read: WriteStream error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
+        exit(-1);
+    }
+
+    // clear the read buffer to prepare for the read.
+    memset(readbuf, 0, sizeof(readbuf));
+
+    // Read the response data from the stream interface.
+    err = pico->ReadStream(streamhandle, readbuf, 16);
+    if (err < 0) {
+        fprintf(stderr, "flash_ctl_rd: ReadStream error: %s\n", PicoErrors_FullError(err, ibuf, sizeof(ibuf)));
+        exit(-1);
+    }
+
+    return (readbuf[0]);
+}
+
diff --git a/src/qspi_flash_ctl/i2c.h b/src/qspi_flash_ctl/i2c.h
new file mode 100644
index 0000000000000000000000000000000000000000..f9434f7f3c26922129dc36688ba90e75d5618f26
--- /dev/null
+++ b/src/qspi_flash_ctl/i2c.h
@@ -0,0 +1,35 @@
+/*
+ * File Name    : i2c.h
+ *
+ * Description  : i2c task headers
+ *
+ * Copyright    : 2018, Micron, Inc.
+ */
+
+// PicoBus Addresses of I2C controller registers
+#define I2C_BASE 0x2000
+
+#define I2C_CONFIG_REG (I2C_BASE + 0x00)
+#define I2C_ADDR_REG (I2C_BASE + 0x08)
+#define I2C_WRDATA_REG (I2C_BASE + 0x10)
+#define I2C_RDDATA_REG (I2C_BASE + 0x18)
+
+// SB-852 CPLD I2C device address
+#define SB852_DEVICE_ADDRESS 0x5F
+
+// SB-852 I2C register addresses inside the CPLD
+#define SB852_CPLD_FPGA_CONFIG_REG \
+  0x12  // Bits: 7-NO_CFG_TIMEOUT, 6-FORCE_PCIE_RESET, 5-RESET_FPGA_CFG_FLASH,
+        // 4-FPGA_IMG_LOADED, 3-FPGA_IMG_SEL, 2:0-FPGA_CFG_MODE
+#define SB852_CPLD_TRIG_RESET_FPGA_PROG_REG \
+  0xB0  // Bits: 7:4-reserved, 3:1-TRIG_RESET_FPGA_PROG_SEC,
+        // 0-TRIG_RESET_FPGA_PROG
+
+#define SI5345_REG 0x5D
+// I2C software functions
+void check_i2c_busy();
+
+void i2c_write(uint32_t dev_addr, uint32_t reg_addr, uint32_t addr_bytes, uint32_t wrdata,
+               uint32_t data_bytes);
+
+uint32_t i2c_read(uint32_t dev_addr, uint32_t reg_addr, uint32_t addr_bytes, uint32_t data_bytes);
diff --git a/src/scdaq.cc b/src/scdaq.cc
index 922af7d57e9f85084d74dc5e068bba4ed8e121de..79fa9c1a0d6c4993874d65704f13b2aed0775306 100644
--- a/src/scdaq.cc
+++ b/src/scdaq.cc
@@ -11,6 +11,7 @@
 #include "DmaInputFilter.h"
 #include "FileDmaInputFilter.h"
 #include "InputFilter.h"
+#include "MicronDmaInputFilter.h"
 #include "WZDmaInputFilter.h"
 #include "config.h"
 #include "controls.h"
@@ -56,6 +57,10 @@ int run_pipeline(int nbThreads, ctrl &control, config &conf) {
     // Create WZ DMA reader
     input_filter = std::make_shared<WZDmaInputFilter>(packetBufferSize, nbPacketBuffers, control);
 
+  } else if (input == config::InputType::MICRONDMA) {
+    // create MicronDmaInputFilter reader
+    input_filter =
+        std::make_shared<MicronDmaInputFilter>(packetBufferSize, nbPacketBuffers, control, conf);
   } else {
     throw std::invalid_argument("Configuration error: Unknown input type was specified");
   }
diff --git a/src/wz_dma.h b/src/wz_dma.h
index 7a31efd2a99469303a5a2b5e779b0ba0b1b049da..a2a8d3287b0f3442eea67d5c0197bff8044d08c9 100644
--- a/src/wz_dma.h
+++ b/src/wz_dma.h
@@ -1,8 +1,8 @@
 #ifndef WZ_XDMA_H
 #define WZ_XDMA_H
 
+#include "stdint.h"
 #include "wzdma/xdma-ioctl.h"
-
 #define TOT_BUF_LEN ((int64_t)WZ_DMA_BUFLEN * (int64_t)WZ_DMA_NOFBUFS)
 
 struct wz_private {
diff --git a/src/wzdma/wz-xdma-ioctl.h b/src/wzdma/wz-xdma-ioctl.h
index 835906c964b4f02b27ef3a0d21e96d08f5a0d0f1..a7bdb6f647922da45c80ab9150f8599a4893809d 100644
--- a/src/wzdma/wz-xdma-ioctl.h
+++ b/src/wzdma/wz-xdma-ioctl.h
@@ -1,4 +1,8 @@
+#include <sys/types.h>
+#include <unistd.h>
 #include <wz-xdma-consts.h>
+
+#include "stdint.h"
 #ifndef WZ_XDMA_IOCTL_H
 #define WZ_XDMA_IOCTL_H 1
 // Structures used to notify the application about the received block of data
diff --git a/test/config/scdaq-calo.conf b/test/config/scdaq-calo.conf
index 9a8b8ac7f6e5d22b7655ceaf497db1ea97487344..20d074b61a5026339918364a1eae22106e4cba22 100644
--- a/test/config/scdaq-calo.conf
+++ b/test/config/scdaq-calo.conf
@@ -8,8 +8,15 @@
 #   "wzdma"     for DMA driver from Wojciech M. Zabolotny
 #   "dma"       for XILINX DMA driver
 #   "filedma"   for reading from file and simulating DMA
+#   "micronDMA" for PICO driver
 #
-input:filedma
+input:fit/config/scdaq-calo.confedma
+
+# Settings for Micron board only
+loadBitFile:yes
+
+# ONLY for Micron Board/DMA, must be in tgz format, produced by pico utility package_flash
+bitFileName:/home/tjames/Pico_Toplevel-bril-250722-1.tgz
 
 ## Settings for DMA input
 
@@ -17,7 +24,7 @@ input:filedma
 dma_dev:/dev/xdma0_c2h_0
 
 # Max received packet size in bytes (buffer to reserve)
-dma_packet_buffer_size:1048576
+dma_packet_buffer_size:2097152
 
 # Number of packet buffers to allocate
 dma_number_of_packet_buffers:1000
@@ -49,6 +56,7 @@ enable_stream_processor:yes
 #   "PASS_THROUGH"
 #   "GMT"
 #   "CALO"
+#   "BRIL"
 # Note: When changing the processing type, change also "output_filename_prefix"
 # in the file output section.
 #
diff --git a/test/config/scdaq-gmt.conf b/test/config/scdaq-gmt.conf
index 04a0e114900b9ad9380b24472bfd389e1d034aab..28feed9476fdfb86752f3450f72ef396ce3d61f8 100644
--- a/test/config/scdaq-gmt.conf
+++ b/test/config/scdaq-gmt.conf
@@ -8,16 +8,23 @@
 #   "wzdma"     for DMA driver from Wojciech M. Zabolotny
 #   "dma"       for XILINX DMA driver
 #   "filedma"   for reading from file and simulating DMA
+#   "micronDMA" for PICO driver
 #
 input:filedma
 
+# Settings for Micron board only
+loadBitFile:yes
+
+# ONLY for Micron Board/DMA, must be in tgz format, produced by pico utility package_flash
+bitFileName:/home/tjames/Pico_Toplevel-bril-250722-1.tgz
+
 ## Settings for DMA input
 
 # DMA device
 dma_dev:/dev/xdma0_c2h_0
 
 # Max received packet size in bytes (buffer to reserve)
-dma_packet_buffer_size:1048576
+dma_packet_buffer_size:2097152
 
 # Number of packet buffers to allocate
 dma_number_of_packet_buffers:1000
diff --git a/test/config/scdaq-micron-bril.conf b/test/config/scdaq-micron-bril.conf
new file mode 100644
index 0000000000000000000000000000000000000000..e517459535ec43165c2ce4c733ff2d3a79fa24ce
--- /dev/null
+++ b/test/config/scdaq-micron-bril.conf
@@ -0,0 +1,105 @@
+################################################################################
+##
+## Input settings
+##
+################################################################################
+
+# Input settings, allowed values are:
+#   "wzdma"     for DMA driver from Wojciech M. Zabolotny
+#   "dma"       for XILINX DMA driver
+#   "filedma"   for reading from file and simulating DMA
+#   "micronDMA" for PICO driver
+#
+input:micronDMA
+
+# Settings for Micron board only
+loadBitFile:yes
+
+# ONLY for Micron Board/DMA, must be in tgz format, produced by pico utility package_flash
+bitFileName:/home/tjames/Pico_Toplevel-bril-250722-1.tgz
+
+## Settings for DMA input
+
+# DMA device
+dma_dev:/dev/xdma0_c2h_0
+
+# Max received packet size in bytes (buffer to reserve)
+dma_packet_buffer_size:2097152
+
+# Number of packet buffers to allocate
+dma_number_of_packet_buffers:1000
+
+# Print report each N packets, use 0 to disable
+packets_per_report:1
+
+# number of orbits per DMA packet, in decimal
+NOrbitsPerDMAPacket:20
+
+# prescale factor applied to *calo* data only
+prescale_factor:20
+
+## Extra settings for "filedma" input
+
+#input_file:/dev/shm/testdata.bin
+input_file:test/data/gmt_testfile.dat
+
+################################################################################
+##
+## Stream processor settings
+##
+################################################################################
+
+enable_stream_processor:yes
+
+# Define processing type (unpacking), allowed values are:
+#   "PASS_THROUGH"
+#   "GMT"
+#   "CALO"
+#   "BRIL"
+# Note: When changing the processing type, change also "output_filename_prefix"
+# in the file output section.
+#
+processor_type:BRIL
+
+# Enable software zero-supression
+doZS:yes
+
+################################################################################
+##
+## File output settings
+##
+################################################################################
+
+output_filename_prefix:scout_BRIL
+
+output_filename_base:test/data
+#output_filename_base:/fff/BU0/ramdisk/scdaq
+
+max_file_size:8589934592
+
+# Always write data to a file regardless of the run status, useful for debugging
+output_force_write:yes
+
+
+################################################################################
+##
+## Elastics processor settings (obsolete)
+##
+################################################################################
+
+enable_elastic_processor:no
+
+port:8000
+elastic_url:http://something.somewhere
+pt_cut:7
+quality_cut:12
+
+
+################################################################################
+##
+## SCDAQ Generic Settings
+##
+################################################################################
+
+# Pipeline settings
+threads:1