diff --git a/Control/CxxUtils/CxxUtils/vec.h b/Control/CxxUtils/CxxUtils/vec.h
index 37c09fa75e0921473e28a14345a6f651330f7425..0f536c8d43770204b4e86bdddbc16f66f3a51afb 100644
--- a/Control/CxxUtils/CxxUtils/vec.h
+++ b/Control/CxxUtils/CxxUtils/vec.h
@@ -24,16 +24,35 @@
  * attribute is supported or a fallback C++ class intended to be
  * (mostly) functionally equivalent.
  *
- * We also support these additional operations:
+ * We also support additional operations.
  *
+ * Deducing useful types:
  *  - @c CxxUtils::vec_type_t<VEC> is the element type of @c VEC.
+ *  - @c CxxUtils::mask_type_t<VEC> is the vector type return by relational
+ *                                  operations.
+ *
+ * Deducing the num of elements in a vectorized type:
  *  - @c CxxUtils::vec_size<VEC>() is the number of elements in @c VEC.
  *  - @c CxxUtils::vec_size(const VEC&) is the number of elements in @c VEC.
+ *
+ * Methods providing similar functionality to certain x86-64 SIMD intrinics
  *  - @c CxxUtils::vbroadcast (VEC& v, T x) initializes each element of
  *                                          @c v with @c x.
+ *  - @c CxxUtils::vload (VEC& dst, vec_type_t<VEC>* mem_addr)
+ *                                          loads elements from @c mem_addr
+ *                                          to @c dst
+ *  - @c CxxUtils::vstore (vec_type_t<VEC>* mem_addr, VEC& src)
+ *                                          stores elements from @c src
+ *                                          to @c mem_addr
+ *  - @c CxxUtils::vselect (VEC& dst, const VEC& a, const VEC& b, const
+ *                          mask_type_t<VEC>& mask)
+ *                          copies elements from @c a or @b, depending
+ *                          on the value @c of mask to @c dst.
+ *  - @c CxxUtils::vmin   (VEC& dst, const VEC& a, const VEC& b)
+ *                         copies to @c dst[i]  the min(a[i],b[i])
+ *
  */
 
-
 #ifndef CXXUTILS_VEC_H
 #define CXXUTILS_VEC_H
 
@@ -41,6 +60,7 @@
 #include "CxxUtils/features.h"
 #include "boost/integer.hpp"
 #include <cstdlib>
+#include <cstring>
 #include <initializer_list>
 #include <algorithm>
 #include <type_traits>
@@ -177,7 +197,7 @@ UNOP(~)
 
 
 #undef UNOP
-  
+
 
 // Define relational operations.
 
@@ -330,13 +350,30 @@ constexpr size_t vec_size(const VEC&)
   return sizeof(VEC) / sizeof(ELT);
 }
 
+/**
+ * brief Deduce the type of a mask , type returned by relational operations,
+ * for a vectorized type.
+ */
+template<class VEC>
+struct mask_type
+{
+  static auto maskt(const VEC& v1, const VEC& v2) -> decltype(v1 < v2);
+  typedef
+    typename std::invoke_result<decltype(maskt), const VEC&, const VEC&>::type
+      type1;
+  typedef std::remove_cv_t<std::remove_reference_t<type1>> type;
+};
+template<class VEC>
+/// Deduce the mask type for a vectorized type.
+using mask_type_t = typename mask_type<VEC>::type;
 
 /**
  * brief Copy a scalar to each element of a vectorized type.
+ * Similar functionality to _mm_set/_mm_broadcast x86-64 intrinsics.
  */
-template <typename VEC, typename T>
-inline
-void vbroadcast (VEC& v, T x)
+template<typename VEC, typename T>
+inline void
+vbroadcast(VEC& v, T x)
 {
   // This may look inefficient, but the loop goes away when we
   // compile with optimization.
@@ -346,6 +383,66 @@ void vbroadcast (VEC& v, T x)
   }
 }
 
+/*
+ * @brief load elements from  memory address (C-array)
+ * to a vectorized type. Similar to _mm_load intrinsics
+ */
+template<typename VEC>
+inline void
+vload(VEC& dst, vec_type_t<VEC>* mem_addr)
+{
+  std::memcpy(&dst, mem_addr, sizeof(VEC));
+}
+
+/*
+ * @brief load elements from a vectorized type to
+ * a memory address (C-array).
+ * Similar to _mm_store intrinsics
+ */
+template<typename VEC>
+inline void
+vstore(vec_type_t<VEC>* mem_addr, VEC& src)
+{
+  std::memcpy(mem_addr, &src, sizeof(VEC));
+}
+
+/*
+ * @brief select/blend function.
+ * Similar _mm_blend X86-64 intrinsics
+ */
+template<typename VEC>
+inline void
+vselect(VEC& dst, const VEC& a, const VEC& b, const mask_type_t<VEC>& mask)
+{
+#if (defined(__clang__) && (__clang_major__ < 10)) ||                          \
+  !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK
+  constexpr size_t N = vec_size<VEC>();
+  for (size_t i = 0; i < N; i++) {
+    dst[i] = mask[i] ? a[i] : b[i];
+  }
+#else
+  dst = mask ? a : b;
+#endif
+}
+
+/*
+ * @brief vectorized min.
+ * Similar to _mm_min intrinsics
+ */
+template<typename VEC>
+inline void
+vmin(VEC& dst, const VEC& a, const VEC& b)
+{
+#if (defined(__clang__) && (__clang_major__ < 10)) ||                          \
+  !HAVE_VECTOR_SIZE_ATTRIBUTE || WANT_VECTOR_FALLBACK
+  constexpr size_t N = vec_size<VEC>();
+  for (size_t i = 0; i < N; i++) {
+    dst[i] = a[i] < b[i] ? a[i] : b[i];
+  }
+#else
+  dst = a < b ? a : b;
+#endif
+}
 
 } // namespace CxxUtils
 
diff --git a/Control/CxxUtils/test/vec_test.cxx b/Control/CxxUtils/test/vec_test.cxx
index 277b0877bb79cd038f6f5459825df25d6d78f541..1fde7ecb02d3c5011dd2bca7240038c6f4e0dcda 100644
--- a/Control/CxxUtils/test/vec_test.cxx
+++ b/Control/CxxUtils/test/vec_test.cxx
@@ -22,7 +22,7 @@
 #include <type_traits>
 #include <limits>
 
-
+#include <array>
 #include <valarray>
 
 
@@ -269,6 +269,66 @@ void test_broadcast (const VEC& v1)
   }
 }
 
+template<class VEC>
+void
+test_storeload(const VEC& v1)
+{
+
+  std::array<CxxUtils::vec_type_t<VEC>, CxxUtils::vec_size<VEC>()> buffer{};
+  CxxUtils::vec_type_t<VEC>* mem_addr = buffer.data();
+
+  CxxUtils::vstore(mem_addr, v1);
+  size_t N = CxxUtils::vec_size<VEC>();
+  for (size_t i = 0; i < N; i++) {
+    assert(v1[i] == mem_addr[i]);
+  }
+
+  VEC v2;
+  CxxUtils::vload(v2, mem_addr);
+  for (size_t i = 0; i < N; i++) {
+    assert(v2[i] == mem_addr[i]);
+  }
+}
+
+template<class VEC>
+void
+test_select(const VEC& v1)
+{
+
+  const VEC v2 = 2 * v1;
+  CxxUtils::mask_type_t<VEC> greater;
+  CxxUtils::mask_type_t<VEC> less;
+  size_t N = CxxUtils::vec_size<VEC>();
+  for (size_t i = 0; i < N; i++) {
+    greater[i] = v1[i] > v2[i];
+    less[i] = v1[i] < v2[i];
+  }
+
+  VEC selectGreater;
+  CxxUtils::vselect(selectGreater, v1, v2, greater);
+  VEC selectLess;
+  CxxUtils::vselect(selectLess, v1, v2, less);
+
+  for (size_t i = 0; i < N; i++) {
+    assert(selectGreater[i] == v2[i]);
+    assert(selectLess[i] == v1[i]);
+  }
+}
+
+template<class VEC>
+void
+test_min(const VEC& v1)
+{
+
+  const VEC v2 = v1 + 1;
+
+  VEC min;
+  CxxUtils::vmin(min, v1, v2);
+  size_t N = CxxUtils::vec_size<VEC>();
+  for (size_t i = 0; i < N; i++) {
+    assert(min[i] == v1[i]);
+  }
+}
 
 template <template <class T, size_t N> class VEC>
 void test1a()
@@ -279,13 +339,16 @@ void test1a()
   assert (CxxUtils::vec_size(v) == 4);
 
 #define ELT(r,data,elem) elem,
-#define INITN(N, ...) { BOOST_PP_LIST_FOR_EACH(ELT, _, BOOST_PP_LIST_FIRST_N(N, BOOST_PP_VARIADIC_TO_LIST(__VA_ARGS__)))  } 
+#define INITN(N, ...) { BOOST_PP_LIST_FOR_EACH(ELT, _, BOOST_PP_LIST_FIRST_N(N, BOOST_PP_VARIADIC_TO_LIST(__VA_ARGS__)))  }
 
 #define TEST_FLOAT(T, N)                                                \
   do {                                                                  \
     test_arith (VEC<T, N> INITN(N, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5)); \
     test_relops (VEC<T, N> INITN(N, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5)); \
     test_broadcast (VEC<T, N> INITN(N, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5)); \
+    test_storeload (VEC<T, N> INITN(N, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5)); \
+    test_select (VEC<T, N> INITN(N, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5)); \
+    test_min (VEC<T, N> INITN(N, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5)); \
   } while(0)
 
   TEST_FLOAT(float, 1);
@@ -303,6 +366,9 @@ void test1a()
     test_arith (VEC<T, N> INITN(N, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); \
     test_relops (VEC<T, N> INITN(N, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); \
     test_broadcast (VEC<T, N> INITN(N, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3)); \
+    test_storeload (VEC<T, N> INITN(N, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); \
+    test_select (VEC<T, N> INITN(N, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); \
+    test_min (VEC<T, N> INITN(N, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); \
     test_int (VEC<T, N> INITN(N, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); \
     test_logops (VEC<T, N> INITN(N, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1)); \
   } while(0)