From 3cc05549d00e7f3207b9227bf4cc85f30c9b5012 Mon Sep 17 00:00:00 2001 From: Giovanna Lazzari Miotto <giovanna.lazzari.miotto@cern.ch> Date: Thu, 20 Mar 2025 13:07:58 +0100 Subject: [PATCH] nmc,cmake: Include micron NMC headers --- CMakeLists.txt | 2 + src/micron/nmc.h | 246 ++++ src/micron/nmc.hpp | 1023 +++++++++++++ src/micron/nmc_errno.h | 54 + src/micron/nmc_sync.h | 61 + src/micron/nmc_sync.hpp | 55 + src/micron/nmc_te.hpp | 1692 +++++++++++++++++++++ src/micron/nmc_te_intrin.h | 2856 ++++++++++++++++++++++++++++++++++++ src/micron/nmc_types.h | 166 +++ 9 files changed, 6155 insertions(+) create mode 100644 src/micron/nmc.h create mode 100644 src/micron/nmc.hpp create mode 100644 src/micron/nmc_errno.h create mode 100644 src/micron/nmc_sync.h create mode 100644 src/micron/nmc_sync.hpp create mode 100644 src/micron/nmc_te.hpp create mode 100644 src/micron/nmc_te_intrin.h create mode 100644 src/micron/nmc_types.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 6a253119..532175f3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,6 +12,8 @@ set(SCDAQ_INCL_DIRS ${SCDAQ_SOURCE_DIR}/json ${SCDAQ_SOURCE_DIR}/wzdma) +#set(MICRON_INCL_DIRS /opt/micron/include) + set(SCDAQ_CMAKE_MODS_DIR ${CMAKE_SOURCE_DIR}/cmake) set(CMAKE_CXX_STANDARD 17) diff --git a/src/micron/nmc.h b/src/micron/nmc.h new file mode 100644 index 00000000..d29ac595 --- /dev/null +++ b/src/micron/nmc.h @@ -0,0 +1,246 @@ +/* + * Copyright (C) 2024 Micron Technology, Inc. + * + * This file is the confidential and proprietary property of + * Micron Technology, Inc. + */ + +#pragma once + +#include <inttypes.h> +#include <stdbool.h> +#include <stddef.h> + +#include "nmc_errno.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _WIN32 +#pragma warning(disable:4099) // Ignore warning where type is declared as class in one place and struct in another +#endif + +#include "nmc_types.h" // pickup common types +#include "nmc_sync.h" + +extern nmc_remote_host_t nmc_attach_remote_host(char *hostname); +extern void nmc_detach_remote_host(nmc_remote_host_t host); + +extern nmc_attach_props_t nmc_attach_props_construct(void); +extern void nmc_attach_props_destruct(nmc_attach_props_t props); + +extern nmc_remote_host_t nmc_ap_get_remote_host(nmc_attach_props_t props); +extern void nmc_ap_set_remote_host(nmc_attach_props_t props, nmc_remote_host_t); + +extern uint32_t nmc_ap_get_device_id(nmc_attach_props_t props); +extern void nmc_ap_set_device_id(nmc_attach_props_t props, uint32_t id); + +uint32_t nmc_ap_get_se_count(nmc_attach_props_t props); +void nmc_ap_set_se_count(nmc_attach_props_t props, uint32_t count); + +uint32_t nmc_ap_get_te_count(nmc_attach_props_t props); +void nmc_ap_set_te_count(nmc_attach_props_t props, uint32_t count); + +extern uint32_t nmc_ap_get_command_queue_count(nmc_attach_props_t props); +extern void nmc_ap_set_command_queue_count(nmc_attach_props_t props, uint32_t count); + +extern uint32_t nmc_ap_get_te_total_thread_count(nmc_attach_props_t props); +extern void nmc_ap_set_te_total_thread_count(nmc_attach_props_t props, uint32_t count); + +extern uint32_t nmc_ap_get_te_master_stack_size(nmc_attach_props_t props); +extern void nmc_ap_set_te_master_stack_size(nmc_attach_props_t props, uint32_t size); + +extern uint32_t nmc_ap_get_te_fiber_stack_size(nmc_attach_props_t props); +extern void nmc_ap_set_te_fiber_stack_size(nmc_attach_props_t props, uint32_t size); + +extern uint32_t nmc_ap_get_te_master_thread_count(nmc_attach_props_t props); +extern void nmc_ap_set_te_master_thread_count(nmc_attach_props_t props, uint32_t count); + +extern nmc_stack_check_mode_t nmc_ap_get_te_stack_check_mode(nmc_attach_props_t props); +extern void nmc_ap_set_te_stack_check_mode(nmc_attach_props_t props, EStackCheckMode mode); + +size_t nmc_ap_get_memory_bytes(nmc_attach_props_t props); +void nmc_ap_set_memory_bytes(nmc_attach_props_t props, size_t size); + +/* + * Resource management + */ + +#define NMC_ATTACH_PROPS_DEFAULT ((nmc_attach_props_t)0) + +extern nmc_t nmc_construct(nmc_attach_props_t props, nmc_status_t *status); +extern void nmc_destruct(nmc_t nmc); + + +/* + * Query routines + */ +extern double nmc_attached_time_ns(nmc_t nmc); + + +/* + * Memory management + */ + +extern void *nmc_malloc(nmc_t nmc, size_t size); +extern void *nmc_calloc(nmc_t nmc, size_t nmemb, size_t size); +extern void *nmc_realloc(nmc_t nmc, void *ptr, size_t size); +extern void *nmc_aligned_alloc(nmc_t nmc, size_t alignment, size_t size); +extern void nmc_free(nmc_t nmc, void *ptr); +extern void *nmc_mmap(nmc_t nmc, void *addr, size_t length, int prot, int flags); +extern int nmc_munmap(nmc_t nmc, void *addr, size_t length); +extern void nmc_alloc_break(size_t seqid); + +/* +* Host User Command Properties +*/ +extern nmc_cmd_props_t nmc_cmd_props_construct(uint16_t cmd_id); +extern void nmc_cmd_props_destruct(nmc_cmd_props_t cmd_props); +extern void nmc_cmd_props_set_cmd_id(nmc_cmd_props_t cmd_props, uint16_t cmd_id); +extern void nmc_cmd_props_set_huq_id(nmc_cmd_props_t cmd_props, uint8_t huq_id); +extern void nmc_cmd_props_set_cmd_atomic(nmc_cmd_props_t cmd_props, bool atomic); +extern void nmc_cmd_props_set_arg1_fp(nmc_cmd_props_t cmd_props, bool arg_fp); +extern void nmc_cmd_props_set_arg2_fp(nmc_cmd_props_t cmd_props, bool arg_fp); +extern void nmc_cmd_props_set_arg3_fp(nmc_cmd_props_t cmd_props, bool arg_fp); +extern void nmc_cmd_props_set_arg4_fp(nmc_cmd_props_t cmd_props, bool arg_fp); + +/* + * Memory Operations + */ +extern nmc_status_t nmc_mem_load(nmc_t nmc, nmc_cmd_props_t cmd_props, + void *pAddr, size_t size); +extern nmc_status_t nmc_mem_store(nmc_t nmc, nmc_cmd_props_t cmd_props, + void *pAddr, uint64_t data, size_t size); + +extern nmc_status_t nmc_mem_copy(nmc_t nmc, nmc_cmd_props_t cmd_props, void *pDst, + void *pSrc, size_t size); + +extern nmc_status_t nmc_mem_set(nmc_t nmc, nmc_cmd_props_t cmd_props, void *pDst, + uint64_t elemData, size_t elemSize, size_t elemCnt); + +extern nmc_status_t nmc_mem_gather_stride(nmc_t nmc, nmc_cmd_props_t cmd_props, void *pDst, + void *pSrcBase, size_t elemSize, size_t elemStride, size_t elemCnt); + +extern nmc_status_t nmc_mem_gather_address(nmc_t nmc, nmc_cmd_props_t cmd_props, void *pDst, + void *pSrcAddr, size_t elemSize, size_t elemCnt); + +extern nmc_status_t nmc_mem_gather_offset(nmc_t nmc, nmc_cmd_props_t cmd_props, + void *pDst, void *pSrcOffset, void *pSrcBase, size_t elemSize, + size_t elemCnt); + +extern nmc_status_t nmc_mem_scatter_stride(nmc_t nmc, nmc_cmd_props_t cmd_props, void *pDstBase, + void *pSrc, size_t elemSize, size_t elemStride, size_t elemCnt); + +extern nmc_status_t nmc_mem_scatter_address(nmc_t nmc, nmc_cmd_props_t cmd_props, void *pDstAddr, + void *pSrc, size_t elemSize, size_t elemCnt); + +extern nmc_status_t nmc_mem_scatter_offset(nmc_t nmc, nmc_cmd_props_t cmd_props, void *pDstBase, + void *DstOffset, void *pSrc, size_t elemSize, size_t elemCnt); + + +/* + * Image and symbol management + */ + +extern nmc_status_t nmc_te_load(const char *pathname); +extern void *nmc_te_lookup(const char *symname); + +extern nmc_status_t nmc_se_load(nmc_t nmc, const char *pathname); +extern void *nmc_se_lookup(nmc_t nmc, const char *symname); + +/* +* Command response +*/ +extern nmc_response_t nmc_response_construct(); +extern void nmc_response_destruct(nmc_response_t response); + +extern nmc_status_t nmc_get_response(nmc_t nmc, nmc_response_t response); +extern nmc_status_t nmc_peek_response(nmc_t nmc, nmc_response_t response); +extern nmc_status_t nmc_pop_response(nmc_t nmc); +extern nmc_cmd_t nmc_response_get_cmd(nmc_response_t response); +extern nmc_cid_t nmc_response_get_cmd_id(nmc_response_t response); +extern nmc_status_t nmc_response_get_status(nmc_response_t response); + +extern void nmc_response_join(nmc_response_t response, nmc_cid_t *cid, uint64_t *arg1, uint64_t *arg2); +extern void nmc_response_atomic(nmc_response_t response, nmc_cid_t *cid, uint64_t *arg); +extern void nmc_response_mem_load(nmc_response_t response, nmc_cid_t *cid, uint64_t *data); +extern void nmc_response_event_receive(nmc_response_t response, nmc_cid_t *cid, uint64_t *ev_data); +extern void nmc_response_event_destination(nmc_response_t response, nmc_cid_t *cid, uint64_t *ev_dest); + + +/* + * Thread creation + */ + +extern nmc_status_t nmc_thread_create(nmc_t nmc, nmc_cmd_props_t cmd_props, + void *entry, void *target, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4); + + +/* + * Events + */ + +extern nmc_event_t nmc_event_allocate(nmc_t nmc, bool bData); +extern nmc_status_t nmc_event_free(nmc_t nmc, nmc_event_t event); + +extern nmc_status_t nmc_event_destination(nmc_t nmc, nmc_cmd_props_t cmd_props, + nmc_event_t evNum); +extern nmc_status_t nmc_event_simple_mode(nmc_t nmc, nmc_cmd_props_t cmd_props, + nmc_event_t evNum); +extern nmc_status_t nmc_event_broadcast_mode(nmc_t nmc, nmc_cmd_props_t cmd_props, + nmc_event_t evNum, uint16_t evChan); +extern nmc_status_t nmc_event_collect_simple_mode(nmc_t nmc, nmc_cmd_props_t cmd_props, + nmc_event_t evNum, uint16_t evCnt); +extern nmc_status_t nmc_event_collect_reduce_mode(nmc_t nmc, nmc_cmd_props_t cmd_props, + nmc_event_reduce_op_type_t evOpType, nmc_event_reduce_op_size_t evOpSize, + nmc_event_t evNum, uint16_t evCnt, uint64_t data); +extern nmc_status_t nmc_event_collect_cascade_mode(nmc_t nmc, nmc_cmd_props_t cmd_props, + uint32_t deviceId, uint32_t cascadeQueueId, nmc_event_t evNum, uint16_t evCnt); +extern nmc_status_t nmc_event_send(nmc_t nmc, nmc_cmd_props_t cmd_props, + uint64_t evDest); +extern nmc_status_t nmc_event_send_data(nmc_t nmc, nmc_cmd_props_t cmd_props, + uint64_t evDest, uint64_t data); +extern nmc_status_t nmc_event_broadcast(nmc_t nmc, nmc_cmd_props_t cmd_props, + nmc_event_t evNum, uint16_t evChan); +extern nmc_status_t nmc_event_broadcast_data(nmc_t nmc, nmc_cmd_props_t cmd_props, + nmc_event_t evNum, uint16_t evChan, uint64_t data); +extern nmc_status_t nmc_event_receive(nmc_t nmc, nmc_cmd_props_t cmd_props, + nmc_event_t evNum); + +/* + * Atomic operations + */ + +extern nmc_status_t nmc_atomic_add(nmc_t nmc, nmc_cmd_props_t cmd_props, + void *pAddr, uint64_t data, size_t size); +extern nmc_status_t nmc_atomic_xor(nmc_t nmc, nmc_cmd_props_t cmd_props, + void *pAddr, uint64_t data, size_t size); +extern nmc_status_t nmc_atomic_and(nmc_t nmc, nmc_cmd_props_t cmd_props, + void *pAddr, uint64_t data, size_t size); +extern nmc_status_t nmc_atomic_or(nmc_t nmc, nmc_cmd_props_t cmd_props, + void *pAddr, uint64_t data, size_t size); +extern nmc_status_t nmc_atomic_min(nmc_t nmc, nmc_cmd_props_t cmd_props, + void *pAddr, uint64_t data, size_t size); +extern nmc_status_t nmc_atomic_minu(nmc_t nmc, nmc_cmd_props_t cmd_props, + void *pAddr, uint64_t data, size_t size); +extern nmc_status_t nmc_atomic_max(nmc_t nmc, nmc_cmd_props_t cmd_props, + void *pAddr, uint64_t data, size_t size); +extern nmc_status_t nmc_atomic_maxu(nmc_t nmc, nmc_cmd_props_t cmd_props, + void *pAddr, uint64_t data, size_t size); +extern nmc_status_t nmc_atomic_fadd32(nmc_t nmc, nmc_cmd_props_t cmd_props, + void *pAddr, float data); +extern nmc_status_t nmc_atomic_fmin32(nmc_t nmc, nmc_cmd_props_t cmd_props, + void *pAddr, float data); +extern nmc_status_t nmc_atomic_fmax32(nmc_t nmc, nmc_cmd_props_t cmd_props, + void *pAddr, float data); +extern nmc_status_t nmc_atomic_fadd64(nmc_t nmc, nmc_cmd_props_t cmd_props, + void *pAddr, double data); +extern nmc_status_t nmc_atomic_fmin64(nmc_t nmc, nmc_cmd_props_t cmd_props, + void *pAddr, double data); +extern nmc_status_t nmc_atomic_fmax64(nmc_t nmc, nmc_cmd_props_t cmd_props, + void *pAddr, double data); + +#ifdef __cplusplus +} +#endif diff --git a/src/micron/nmc.hpp b/src/micron/nmc.hpp new file mode 100644 index 00000000..b6e5c99f --- /dev/null +++ b/src/micron/nmc.hpp @@ -0,0 +1,1023 @@ +// +// Copyright (C) 2024 Micron Technology, Inc. +// +// This file is the confidential and proprietary property of +// Micron Technology, Inc. +// + +#pragma once + +#include <cstring> +#include <type_traits> +#include "nmc.h" +#include "nmc_sync.hpp" + +class NmcAttachProps { + friend class Nmc; +public: + /// @brief Construct an empty attach properties object + /// + /// Used to specify hardware resources in a Near Memory Compute Device + /// attach domain. The object is initialized to the device defaults and the + /// programmer can update only the desired properties. + /// + /// The attach property object is also used to query the Near Memory Compute Device + /// properties after a successful attach. During attach, these fields are updated + /// to match what was allocated. + /// + /// @see Example: + /// To query default Transformation Engine thread count + /// @code + /// NmcAttachProps nmcProps; + /// nmc_status_t status = NMC_NO_MEM; + /// Nmc *pNmc = new Nmc(&status, &nmcProps; + /// + /// uint32_t teThreadCount = nmcProps.getTeTotalThreadCount(); + /// @endcode + NmcAttachProps() { + m_opaque = nmc_attach_props_construct(); + } + + /// @brief Destroy an attach properties object + ~NmcAttachProps() { + nmc_attach_props_destruct(m_opaque); + } + + /// @brief Return device ID + /// + /// @return Device ID + uint32_t getDeviceId() { + return nmc_ap_get_device_id(m_opaque); + } + + /// @brief Set device ID + /// + /// @param[in] id ID of device to request + void setDeviceId(uint32_t id) { + nmc_ap_set_device_id(m_opaque, id); + } + + /// @brief Return number of SE compute elements + /// + /// @return Count of SE elements + uint32_t getSeCount() { + return nmc_ap_get_se_count(m_opaque); + } + + /// @brief Set number of SE compute elements + /// + /// @param[in] seCnt + void setSeCount(uint32_t seCnt) { + nmc_ap_set_se_count(m_opaque, seCnt); + } + + /// @brief Return number of TE compute elements + /// + /// @return Count of TE elements + uint32_t getTeCount() { + return nmc_ap_get_te_count(m_opaque); + } + + /// @brief Set number of TE compute elements + /// + /// @param[in] teCnt + void setTeCount(uint32_t teCnt) { + nmc_ap_set_te_count(m_opaque, teCnt); + } + + /// @brief Return number of TE threads (masters and fibers) + /// + /// @return Count of total threads + uint32_t getTeTotalThreadCount() { + return nmc_ap_get_te_total_thread_count(m_opaque); + } + + /// @brief Set number of TE threads (masters and fibers) + /// + /// @param[in] threadCnt Number of total threads + void setTeTotalThreadCount(uint32_t threadCnt) { + nmc_ap_set_te_total_thread_count(m_opaque, threadCnt); + } + + /// @brief Return count of command queues in attach parameters + /// + /// @return Command queue count + uint32_t getCommandQueueCount() { + return nmc_ap_get_command_queue_count(m_opaque); + } + + /// @brief Set number of command queues to request for an Nmc::attach() + /// + /// @param[in] commandQueueCount Command queue count + void setCommandQueueCount(uint32_t commandQueueCount) { + nmc_ap_set_command_queue_count(m_opaque, commandQueueCount); + } + + /// @brief Return size of stack for TE master threads + /// + /// @return Stack size in bytes + uint32_t getTeMasterStackSize() { + return nmc_ap_get_te_master_stack_size(m_opaque); + } + + /// @brief Set size of TE master thread stack for an Nmc::attach() + /// + /// @param[in] teMasterStackSize Size in bytes + void setTeMasterStackSize(uint32_t teMasterStackSize) { + nmc_ap_set_te_master_stack_size(m_opaque, teMasterStackSize); + } + + /// @brief Return size of stack for TE fibers + /// + /// @return Stack size in bytes + uint32_t getTeFiberStackSize() { + return nmc_ap_get_te_fiber_stack_size(m_opaque); + } + + /// @brief Set size of TE fiber stack for an Nmc::attach() + /// + /// @param[in] teFiberStackSize Size in bytes + void setTeFiberStackSize(uint32_t teFiberStackSize) { + nmc_ap_set_te_fiber_stack_size(m_opaque, teFiberStackSize); + } + + /// @brief Return maximum number of TE master threads + /// + /// @return Count of available master threads + uint32_t getTeMasterThreadCount() { + return nmc_ap_get_te_master_thread_count(m_opaque); + } + + /// @brief Set number of master TE threads to request for an Nmc::attach() + /// + /// @param[in] teMasterThreadCount Number of threads + void setTeMasterThreadCount(uint32_t teMasterThreadCount) { + nmc_ap_set_te_master_thread_count(m_opaque, teMasterThreadCount); + } + + /// @brief Return checking mode for TE stack access + /// + /// @return Stack access mode + EStackCheckMode getTeStackCheckMode() { + return nmc_ap_get_te_stack_check_mode(m_opaque); + } + + /// @brief Set access mode for TE stacks + /// + /// @param[in] mode Stack access mode + void setTeStackCheckMode(EStackCheckMode mode) { + nmc_ap_set_te_stack_check_mode(m_opaque, mode); + } + + /// @brief Return reserved memory amount + /// + /// @return Amount of reserved memory in bytes + size_t getMemoryBytes() { + return nmc_ap_get_memory_bytes(m_opaque); + } + + /// @brief Set amount of memory to reserve + /// + /// @param[in] mem Amount of memory to reserve in bytes + void setMemoryBytes(size_t size) { + nmc_ap_set_memory_bytes(m_opaque, size); + } + +private: + nmc_attach_props_t m_opaque; +}; + +class NmcResponse { + friend class Nmc; +public: + NmcResponse() { + m_opaque = nmc_response_construct(); + } + ~NmcResponse() { + nmc_response_destruct(m_opaque); + } + + ENmcCmd getCmd() { + return nmc_response_get_cmd(m_opaque); + } + + nmc_cid_t getCmdId() { + return nmc_response_get_cmd_id(m_opaque); + } + + nmc_status_t getStatus() { + return nmc_response_get_status(m_opaque); + } + + template<typename T1 = uint64_t, typename T2 = uint64_t> + void threadJoin(nmc_cid_t* pCmdId, T1* _arg1 = 0, T2* _arg2 = 0) { + uint64_t arg1, arg2; + nmc_response_join(m_opaque, pCmdId, &arg1, &arg2); + if (_arg1) memcpy(_arg1, &arg1, sizeof(T1)); + if (_arg2) memcpy(_arg2, &arg2, sizeof(T2)); + } + + template<typename T> + void atomic(nmc_cid_t* pCmdId, T* _arg) { + uint64_t arg; + nmc_response_atomic(m_opaque, pCmdId, &arg); + memcpy(_arg, &arg, sizeof(T)); + } + + template<typename T> + void memLoad(nmc_cid_t* pCmdId, T* pData) { + uint64_t data; + nmc_response_mem_load(m_opaque, pCmdId, &data); + memcpy(pData, &data, sizeof(T)); + } + + void eventReceive(nmc_cid_t* pCmdId, uint64_t* pEvData) { + nmc_response_event_receive(m_opaque, pCmdId, pEvData); + } + + void eventDestination(nmc_cid_t* pCmdId, uint64_t* pEvDest) { + nmc_response_event_destination(m_opaque, pCmdId, pEvDest); + } + +private: + nmc_response_t m_opaque; +}; + +#if 0 +/// +/// Near Memory Compute Mutex +/// +class NmcMutex { + NmcMutex(); + ~NmcMutex(); + + int trylock(); + int lock(); + int unlock(); +}; + + +class NmcBarrier { + friend class Nmc; + +private: + NmcBarrier(Nmc& nmc) : m_opaque(nmc.m_opaque) { + m_barrier = nmc_barrier_create(m_opaque); + } + + nmc_t m_opaque; + nmc_barrier_t m_barrier; + +public: + ~NmcBarrier() { + nmc_barrier_destroy(m_opaque, m_barrier); + } + nmc_status_t add(uint32_t threads) { + return nmc_barrier_add(m_opaque, m_barrier, threads); + } +}; + +#endif + +class NmcCmdProps { + friend class Nmc; +public: + /// @brief Construct an empty command properites object + /// + /// NmcCmdProps is used to customize commands on the NMC + /// It is currently used for: + /// - specifying the CmdId for matching commands on the response side + /// - Configuring Atomic and Fencing operations + /// - transfering type information into type commands + /// + NmcCmdProps(uint16_t cmdId = 0) { + m_opaque = nmc_cmd_props_construct(cmdId); + } + + /// @brief Destroy a command properties object + ~NmcCmdProps() { + nmc_cmd_props_destruct(m_opaque); + } + + void setCmdId(uint16_t cmdId) { + nmc_cmd_props_set_cmd_id(m_opaque, cmdId); + } + void setHuqId(uint8_t huqId) { + nmc_cmd_props_set_huq_id(m_opaque, huqId); + } + void setCmdAtomic(bool bAtomic) { + nmc_cmd_props_set_cmd_atomic(m_opaque, bAtomic); + } + + /// @brief Set argument 1 as a float point value + /// + /// @param[in] mode Argument 1 floating point value + void setArg1Fp(bool argFp) { + nmc_cmd_props_set_arg1_fp(m_opaque, argFp); + } + + /// @brief Set argument 2 as a float point value + /// + /// @param[in] mode Argument 2 floating point value + void setArg2Fp(bool argFp) { + nmc_cmd_props_set_arg2_fp(m_opaque, argFp); + } + + /// @brief Set argument 3 as a float point value + /// + /// @param[in] mode Argument 3 floating point value + void setArg3Fp(bool argFp) { + nmc_cmd_props_set_arg3_fp(m_opaque, argFp); + } + + /// @brief Set argument 4 as a float point value + /// + /// @param[in] mode Argument 4 floating point value + void setArg4Fp(bool argFp) { + nmc_cmd_props_set_arg4_fp(m_opaque, argFp); + } + +private: + nmc_cmd_props_t m_opaque; +}; + +class NmcCmdId : public NmcCmdProps { +public: + /// @brief Construct an empty command id object + /// + /// NmcCmdId is an NmcCmdProps object that only uses the cmdId + /// + NmcCmdId(uint16_t cmdId) { + setCmdId(cmdId); + } +}; + +class NmcHuqId : public NmcCmdProps { +public: + /// @brief Construct an empty command id object + /// + /// NmcCmdId is an NmcCmdProps object that only uses the cmdId + /// + NmcHuqId(uint8_t huqId) { + setHuqId(huqId); + } +}; + +class Nmc { + +public: + /// @brief Constructs an instance of the Nmc class. + /// + /// Upon creation, will attempt to reserve the Near Memory Compute + /// resources specified in pProps (or a default set of resources if + /// a nullptr is specified). The returned allocation will include + /// memory and compute elements from a single NMC device even if + /// multiple devices are present. + /// + /// @param[out] pStatus Returns zero on success, or a non-zero error code + /// otherwise. Callers should verify success prior to invoking any + /// other methods of the class (except ~Nmc(), which is always permitted). + /// + /// @param[in] pProps Optional argument specifying the requested resources + /// (see class NmcAttachProps). If omitted, a default allocation + /// consisting of an entire NMC device will be requested. + Nmc(nmc_status_t *pStatus = nullptr) : Nmc(NmcAttachProps{}, pStatus) {} + + Nmc(NmcAttachProps const &attachProps, nmc_status_t *pStatus = nullptr) { + m_opaque = nmc_construct(attachProps.m_opaque, pStatus); + } + + /// @brief Release Near Memory Compute resources. + /// + /// Releases any reserved Near Memory Compute resources and memory. Any + /// threads executing within the NMC attach domain will be terminated. + /// + ~Nmc() { + nmc_destruct(m_opaque); + } + + /// @brief Return time since attach. + /// + /// @return Time in nanoseconds since successful allocation of a Near + /// Memory Compute attach domain. + double getAttachedTimeNs() { + return nmc_attached_time_ns(m_opaque); + } + + /// @brief Allocates memory on a device within the NMC attach domain + /// + /// @param[in] size in bytes of the memory to be allocated + /// + /// @return pointer to the newly allocated memory or nullptr on failure + void* malloc(size_t size) { + return nmc_malloc(m_opaque, size); + } + + /// @brief Allocates memory on a device within the NMC attach domain + /// + /// Allocates a block of memory for an array of nmemb elements, each of them size bytes long, + /// and initializes all its bits to zero. + /// + /// @param[in] nmemb number of elements to allocate. + /// @param[in] size size of each element in bytes + /// @return pointer to the newly allocated memory or nullptr on failure + void* calloc(size_t nmemb, size_t size) { + return nmc_calloc(m_opaque, nmemb, size); + } + + /// + /// @brief resize a memory allocation + /// + /// Resizes a memory allocation pointed to by \p ptr to \p size bytes. + /// If \p ptr is nullptr, this functional behaves like malloc. + /// If \p ptr is not nullptr, the returned allocation will contain the data in + /// the original allocation up to the lesser of the original allocation size and \p size. + /// If \p size is larger than the original allocation, the excess bytes are uninitialized. + /// + /// @param[in] ptr pointer to the memory to be reallocated + /// @param[in] size size of the the new array + /// @return pointer to newly reallocated memory or nullptr on failure + void* realloc(void* ptr, size_t size) { + return nmc_realloc(m_opaque, ptr, size); + } + + /// + /// @brief allocate an aligned memory block + /// + /// Allocates \p size bytes of memory that are aligned to at least \p alignment + /// + /// @param[in] alignment the alignment granularity + /// @param[in] size the number of bytes to allocation + /// @return a pointer to the new allocation or nullptr if the memory cannot be allocated + void* alignedAlloc(size_t alignment, size_t size) { + return nmc_aligned_alloc(m_opaque, alignment, size); + } + + /// @brief free allocated memory + /// + /// Frees the memory pointed to by \p ptr, which must have been returned + /// by a previous call of Nmc::malloc(), Nmc::calloc(), or + /// Nmc::alignedAlloc(). Otherwise, or if Nmc::free(ptr) has already been + /// called, undefined behavior occurs. If \p ptr is a nullptr, no operation + /// is performed. + /// + /// @param[in] ptr Address of memory to free + void free(void* ptr) { + nmc_free(m_opaque, ptr); + } + + /// @brief map NMC device memory into address space + /// + /// Creates a new mapping in the virtual address space of the calling + /// process. The starting address for the new mapping is specified in + /// \p addr. The \p length argument specifies the length of the mapping. + /// + /// The \p prot agrument is as defined for mmap(), with the exception + /// that PROC_EXEC will be ignored. + /// + /// The 'flags' argument is as defined for mmap(2), with the exception + /// that MAP_PRIVATE and MAP_ANONYMOUS are assumed for all mappings. + /// + /// @param[in] addr Starting address for mapping; must be a multiple of the page size + /// @param[in] length Length for the mapping + /// @param[in] prot Memory protection mode; see mmap(2) + /// @param[in] flags Access flags; see mmap(2) + /// @return Pointer to mapped area or MAP_FAILED; errno set on failure + void* mmap(void* addr, size_t length, int prot, int flags) { + return nmc_mmap(m_opaque, addr, length, prot, flags); + } + + /// @brief delete mappings for specified address range + /// + /// Deletes the mappings for a specified address range and cause any further references + /// to addressess within the range to generate invalid memory references. + /// + /// @param[in] addr Starting address of mapping; must have been returned from a prior call to Nmc::mmap() + /// @param[in] length Length of the mapping + /// @return Zero on success; returns -1 and sets errno on failure + int munmap(void* addr, size_t length) { + return nmc_munmap(m_opaque, addr, length); + } + + /// + /// @brief break on allocated memory block + /// + /// Identifies a memory block to break on when allocated to identify memory leak source + /// + /// @param[in] seqId memory block identifier obtained from previous run of application + static void allocBreak(size_t seqId) { + nmc_alloc_break(seqId); + } + + /// @brief Issue load request + /// + /// @tparam T Size of data to load + /// @param[in] cmdProps Command Properties; used to specify id, atomic, and fencing operations + /// @param[in] pAddr Address to load from + /// + /// @return NMC_SUCCESS or error number indicating reason for failure + template <typename T> + nmc_status_t memLoad(NmcCmdProps const &cmdProps, T* pAddr) { + return nmc_mem_load(m_opaque, cmdProps.m_opaque, pAddr, sizeof(T)); + } + + /// @brief Pack argument into 64 bit payload + /// + /// Helper function used in threadCreate for taking an input argument + /// and packing it into a 64 bit payload to send to the device. + /// Only works for arithmetic and pointer types. + /// + template<typename T> + uint64_t extendArg(T arg) + { + static_assert((std::is_arithmetic<T>::value || std::is_pointer<T>::value) && + sizeof(T) <= sizeof(uint64_t), "extendArg expects numeric/pointer inputs 64 bits or less"); + if (std::is_floating_point<T>::value) { + uint64_t rtn = 0; + memcpy(&rtn, &arg, sizeof(T)); + return rtn; + } else + return std::is_signed<T>::value ? (int64_t)arg : (uint64_t)arg; + } + + /// @brief Store data in NMC memory + /// + /// @tparam T Size of data to store + /// @param[in] cmdProps Command Properties; used to specify id, atomic, and fencing operations + /// @param[in] pAddr Address to store to + /// @param[in] data Value to store + /// + /// @return NMC_SUCCESS or error number indicating reason for failure + template <typename T> + nmc_status_t memStore(NmcCmdProps const& cmdProps, T* pAddr, T _data) { + uint64_t data = extendArg(_data); + return nmc_mem_store(m_opaque, cmdProps.m_opaque, pAddr, data, sizeof(T)); + } + + /// @brief Copy data + /// + /// Copies data between host memory and NMC memory. + /// + /// @param[in] cmdProps Command Properties; used to specify id, atomic, and fencing operations + /// @param[in] pDst Destination address; may be either host or NMC memory. + /// @param[in] pSrc Source address; may be either host or NMC memory. + /// @param[in] size in bytes (1, 2, 4, or 8) + /// + /// @return NMC_SUCCESS or error number indicating reason for failure + nmc_status_t memCopy(NmcCmdProps const &cmdProps, void* pDst, void* pSrc, size_t size) { + return nmc_mem_copy(m_opaque, cmdProps.m_opaque, pDst, pSrc, size); + } + + /// @brief Set NMC memory + /// + /// @tparam T Element size to operate on + /// @param[in] cmdProps Command Properties; used to specify id, atomic, and fencing operations + /// @param[in] pDst Start address of memory region to operate on + /// @param[in] elemData Value to store + /// @param[in] elemCnt Number of elements of size T + /// + /// @return NMC_SUCCESS or error number indicating reason for failure + template <typename T> + nmc_status_t memSet(NmcCmdProps const &cmdProps, void* pDst, + T elemData, size_t elemSize, size_t elemCnt) { + return nmc_mem_set(m_opaque, cmdProps.m_opaque, pDst, elemData, sizeof(T), elemCnt); + } + + /// @brief Perform stride-based gather operation + /// + /// @param[in] cmdProps Command Properties; used to specify id, atomic, and fencing operations + /// @param[in] pDst Destination address + /// @param[in] pSrcBase Base source address; assumed to be NMC memory + /// @param[in] elemSize Size of each element in bytes + /// @param[in] elemStride Distance between consecutive elements + /// @param[in] elemCnt Number of elements to gather + /// + /// @return NMC_SUCCESS or error number indicating reason for failure + nmc_status_t memGather(NmcCmdProps const &cmdProps, void* pDst, void* pSrcBase, + size_t elemSize, size_t elemStride, size_t elemCnt) { + return nmc_mem_gather_stride(m_opaque, cmdProps.m_opaque, pDst, pSrcBase, elemSize, elemStride, elemCnt); + } + + /// @brief Perform address-based gather operation + /// + /// @param[in] cmdProps Command Properties; used to specify id, atomic, and fencing operations + /// @param[in] pDst Destination address + /// @param[in] pSrcBase Base source address; assumed to be NMC memory + /// @param[in] elemSize Size of each element in bytes + /// @param[in] elemCnt Number of elements to gather + /// + /// @return NMC_SUCCESS or error number indicating reason for failure + nmc_status_t memGather(NmcCmdProps const &cmdProps, void* pDst, void* pSrcAddr, + size_t elemSize, size_t elemCnt) { + return nmc_mem_gather_address(m_opaque, cmdProps.m_opaque, pDst, pSrcAddr, elemSize, elemCnt); + } + + /// @brief Perform an offset-based gather operation + /// + /// @param[in] cmdProps Command Properties; used to specify id, atomic, and fencing operations + /// @param[in] pDst Destination address + /// @param[in] pSrcBase Base source address; assumed to be NMC memory + /// @param[in] pSrcOffset + /// @param[in] elemSize Size of each element in bytes + /// @param[in] elemCnt Number of elements to gather + /// + /// @return NMC_SUCCESS or error number indicating reason for failure + nmc_status_t memGather(NmcCmdProps const &cmdProps, void* pDst, void* pSrcBase, + void* pSrcOffset, size_t elemSize, size_t elemCnt) { + return nmc_mem_gather_offset(m_opaque, cmdProps.m_opaque, pDst, pSrcBase, pSrcOffset, elemSize, elemCnt); + } + + /// @brief Perform stride-based scatter operation + /// + /// @param[in] cmdProps Command Properties; used to specify id, atomic, and fencing operations + /// @param[in] pDst Destination address + /// @param[in] pSrcBase Base source address; assumed to be NMC memory + /// @param[in] elemSize Size of each element in bytes + /// @param[in] elemStride Distance between consecutive elements + /// @param[in] elemCnt Number of elements to gather + /// + /// @return NMC_SUCCESS or error number indicating reason for failure + nmc_status_t memScatter(NmcCmdProps const &cmdProps, void* pDstBase, void* pSrc, + size_t elemSize, size_t elemStride, size_t elemCnt) { + return nmc_mem_scatter_stride(m_opaque, cmdProps.m_opaque, pDstBase, pSrc, elemSize, elemStride, elemCnt); + } + + /// @brief Perform address-based scatter operation + /// + /// @param[in] cmdProps Command Properties; used to specify id, atomic, and fencing operations + /// @param[in] pDst Destination address + /// @param[in] pSrcBase Base source address; assumed to be NMC memory + /// @param[in] elemSize Size of each element in bytes + /// @param[in] elemCnt Number of elements to gather + /// + /// @return NMC_SUCCESS or error number indicating reason for failure + nmc_status_t memScatter(NmcCmdProps const &cmdProps, void* pDstAddr, void* pSrc, + size_t elemSize, size_t elemCnt) { + return nmc_mem_scatter_address(m_opaque, cmdProps.m_opaque, pDstAddr, pSrc, elemSize, elemCnt); + } + + /// @brief Perform an offset-based scatter operation + /// + /// @param[in] cmdProps Command Properties; used to specify id, atomic, and fencing operations + /// @param[in] pDstBase + /// @param[in] DstOffset + /// @param[in] pSrc Source address + /// @param[in] elemSize Size of each element in bytes + /// @param[in] elemCnt Number of elements to scatter + /// + /// @return NMC_SUCCESS or error number indicating reason for failure + nmc_status_t memScatter(NmcCmdProps const &cmdProps, void* pDstBase, void* pDstOffset, + void* pSrc, size_t elemSize, size_t elemCnt) { + return nmc_mem_scatter_offset(m_opaque, cmdProps.m_opaque, pDstBase, pDstOffset, pSrc, elemSize, elemCnt); + } + + /// @brief Load an image into all attached TEs. + /// + /// If you are allocating TEs, you must call teLoad before allocating + /// a Nmc object. Example: + /// + /// status = Nmc::teLoad("foobar.r5"); + /// + /// @param[in] pathname Absolute or relative path to image file + /// + /// @return NMC_SUCCESS or error number indicating reason for failure + static nmc_status_t teLoad(const char* pathname) { + return nmc_te_load(pathname); + } + + /// @brief Look up the address of a TE entry point + /// + /// @param[in] symname Name of the symbol to look up in the image + /// + /// @return A nullptr on failure, or the address of the entry point for the named symbol. + static void* teLookup(const char* symname) { + return nmc_te_lookup(symname); + } + + /// @brief Load an image into all attached SEs + /// + /// @param[in] pathname Absolute or relative path to image file + /// + /// @return NMC_SUCCESS or error number indicating reason for failure + nmc_status_t seLoad(const char* pathname) { + return nmc_se_load(m_opaque, pathname); + } + + /// @brief Look up the address of an SE entry point + /// + /// @param[in] symname Symbolic name of entry + /// + /// @return A nullptr on failure, or the address of the entry point for the named symbol. + void* seLookup(const char* symname) { + return nmc_se_lookup(m_opaque, symname); + } + + /// @brief Create a thread on a NMC processing element + /// + /// Creates a new thread of execution on an NMC device in the current attach domain. Up to four + /// arguments of varying sizes may be passed. The thread will be created on the attached NMC device + /// which hosts the memory given by pTarget. + /// + /// @tparam T1 Size of _arg1 (8, 16, 32, or 64-bit integer; signed or unsigned) + /// @tparam T2 Size of _arg1 (8, 16, 32, or 64-bit integer; signed or unsigned) + /// @tparam T3 Size of _arg1 (8, 16, 32, or 64-bit integer; signed or unsigned) + /// @tparam T4 Size of _arg1 (8, 16, 32, or 64-bit integer; signed or unsigned) + /// @param[in] cmdProps Command Properties; used to specify id, atomic, and fencing operations + /// @param[in] pFunc Entry point for new thread + /// @param[in] pTarget Address of data to operate on + /// @param[in] _arg1 Optional argument to pFunc + /// @param[in] _arg2 Optional argument to pFunc + /// @param[in] _arg3 Optional argument to pFunc + /// @param[in] _arg4 Optional argument to pFunc + /// + /// @return NMC_SUCCESS or error number indicating reason for failure + template<typename T1 = uint64_t, + typename T2 = uint64_t, + typename T3 = uint64_t, + typename T4 = uint64_t> + nmc_status_t threadCreate(NmcCmdProps& cmdProps, + void *pFunc, void *pTarget, + T1 _arg1 = 0, T2 _arg2 = 0, T3 _arg3 = 0, T4 _arg4 = 0) + { + cmdProps.setArg1Fp(std::is_floating_point<T1>::value); + cmdProps.setArg2Fp(std::is_floating_point<T2>::value); + cmdProps.setArg3Fp(std::is_floating_point<T3>::value); + cmdProps.setArg4Fp(std::is_floating_point<T4>::value); + uint64_t arg1 = extendArg(_arg1); + uint64_t arg2 = extendArg(_arg2); + uint64_t arg3 = extendArg(_arg3); + uint64_t arg4 = extendArg(_arg4); + return nmc_thread_create(m_opaque, cmdProps.m_opaque, pFunc, pTarget, + arg1, arg2, arg3, arg4); + } + + /// + /// @brief Overload threadCreate for Rvalues + /// + /// This overload enables calling threadCreate with a temporary NmcCmdProps object + /// (e.g nmc.threadCreate(NmcCmdId(1)... ). This is needed because the thread create + /// call will update the command properties to mark which arguments are floating point, + /// so the base function can no longer be const &. + /// + template <typename T1 = uint64_t, + typename T2 = uint64_t, + typename T3 = uint64_t, + typename T4 = uint64_t> + nmc_status_t threadCreate(NmcCmdProps&& cmdProps, + void* pFunc, void* pTarget, + T1 _arg1 = 0, T2 _arg2 = 0, T3 _arg3 = 0, T4 _arg4 = 0) { + return threadCreate(cmdProps, pFunc, pTarget, _arg1, _arg2, _arg3, _arg4); + } + + /// @brief Read one completion response from the response queue + /// + /// @param pResp pointer to the NmcResponse object to receive into + /// + /// @return NMC_SUCCESS or error number indicating reason for failure + nmc_status_t recvResponse(NmcResponse* pResp) { + return nmc_get_response(m_opaque, pResp->m_opaque); + } + + /// @brief Peek at next completion response from the response queue + /// + /// @param pResp pointer to the NmcResponse object to receive into + /// + /// @return NMC_SUCCESS or error number indicating reason for failure + nmc_status_t peekResponse(NmcResponse* pResp) { + return nmc_peek_response(m_opaque, pResp->m_opaque); + } + + /// @brief Pop one completion response from the response queue + /// + /// @return NMC_SUCCESS or error number indicating reason for failure + nmc_status_t popResponse() { + return nmc_pop_response(m_opaque); + } + + // Events + + /// + /// @brief Allocate a new event + /// + /// Reserves an event within the nmc attach domain + /// + /// @param bData Whether the event includes data, which must be a uint64_t + /// @return an nmc_event_t, regarded as an opaque event container. + /// Use isEventValid to ensure that a valid event was allocated + nmc_event_t eventAllocate(bool bData) { + return nmc_event_allocate(m_opaque, bData); + } + + /// + /// @brief Free an event + /// + /// Releases the event reservation to the nmc attach domain + /// + /// @param event the event to free + /// @return NMC_SUCCESS or error number indicating reason for failure + nmc_status_t eventFree(nmc_event_t event) { + return nmc_event_free(m_opaque, event); + } + + /// + /// @brief Send a request for the event destination + /// + /// Events are delivered to specific channels identified by destination address in + /// the attach domain. This function is used to request the destination address. + /// The address can be retrieved from the response using NmcResponse::eventDestination() + /// + /// @param cmdProps Command Properties; used to specify id, atomic, and fencing operations + /// @param evNum the nmc_event_t opaque pointer + /// @return NMC_SUCCESS or error number indicating reason for failure + nmc_status_t eventDestination(NmcCmdProps const &cmdProps, nmc_event_t evNum) { + return nmc_event_destination(m_opaque, cmdProps.m_opaque, evNum); + } + + /// + /// @brief Sets the event delivery mode to simple (1-to-1) + /// + /// @param cmdProps Command Properties; used to specify id, atomic, and fencing operations + /// @param evNum the event number + /// @return nmc_status_t + nmc_status_t eventSimpleMode(NmcCmdProps const &cmdProps, nmc_event_t evNum) { + return nmc_event_simple_mode(m_opaque, cmdProps.m_opaque, evNum); + } + + /// + /// @brief Sets the event delivery mode to broadcast (1-to-all) + /// + /// @param cmdProps Command Properties; used to specify id, atomic, and fencing operations + /// @param evNum the event number + /// @param evChan The channel on which to set the mode. + /// @return nmc_status_t + nmc_status_t eventBroadcastMode(NmcCmdProps const &cmdProps, nmc_event_t evNum, uint16_t evChan) { + return nmc_event_broadcast_mode(m_opaque, cmdProps.m_opaque, evNum, evChan); + } + + /// + /// @brief Sets the event delivery mode to Simple collective (waits for a specified number of events to be received, similar to a barrier) + /// + /// @param cmdProps Command Properties; used to specify id, atomic, and fencing operations + /// @param evNum the event number + /// @param evCnt The number of events before trigger + /// @return nmc_status_t + nmc_status_t eventCollectSimpleMode(NmcCmdProps const &cmdProps, nmc_event_t evNum, uint16_t evCnt) { + return nmc_event_collect_simple_mode(m_opaque, cmdProps.m_opaque, evNum, evCnt); + } + + /// + /// @brief Sets the event delivery mode to Reduce collective (waits for a specified number of events to be received, similar to a barrier) + /// and perform reduction operation on message data + /// + /// @param cmdProps Command Properties; used to specify id, atomic, and fencing operations + /// @param opType The reduction operation type + /// @param opSize The reduction operation size (4/8 bytes) + /// @param evNum the event number + /// @param evCnt The number of events before trigger + nmc_status_t eventCollectReduceMode(NmcCmdProps const &cmdProps, ENmcEventReduceOpType opType, ENmcEventReduceOpSize opSize, nmc_event_t evNum, uint16_t evCnt, uint64_t evData) { + return nmc_event_collect_reduce_mode(m_opaque, cmdProps.m_opaque, opType, opSize, evNum, evCnt, evData); + } + + /// + /// @brief Sets the event delivery mode to Cascade collective (waits for a specified number of events to be received, similar to a barrier) + /// and when all events have been received, send a Cascade event to a target device + /// + /// @param cmdProps Command Properties; used to specify id, atomic, and fencing operations + /// @param DeviceId The target devcie ID + /// @param CascadeQueueId The queue ID on the targeted device + /// @param evNum the event number + /// @param evCnt The number of events before trigger + /// @return nmc_status_t + nmc_status_t eventCollectCascadeMode(NmcCmdProps const &cmdProps, uint32_t deviceId, uint32_t cascadeQueueId, nmc_event_t evNum, uint16_t evCnt) { + return nmc_event_collect_cascade_mode(m_opaque, cmdProps.m_opaque, deviceId, cascadeQueueId, evNum, evCnt); + } + + /// + /// @brief Send a message without data + /// + /// @param cmdProps Command Properties; used to specify id, atomic, and fencing operations + /// @param evDest the destination of the event, previously returned with NmcResponse::eventDestination + /// @return NMC_SUCCESS or enum indicating reason for failure + nmc_status_t eventSend(NmcCmdProps const &cmdProps, uint64_t evDest) { + return nmc_event_send(m_opaque, cmdProps.m_opaque, evDest); + } + + /// + /// @brief Send a message with data + /// + /// @param cmdProps Command Properties; used to specify id, atomic, and fencing operations + /// @param evDest the destination of the event, previously returned with NmcResponse::eventDestination + /// @param evData NMC_SUCCESS or enum indicating reason for failure + /// @return nmc_status_t + nmc_status_t eventSend(NmcCmdProps const &cmdProps, uint64_t evDest, uint64_t evData) { + return nmc_event_send_data(m_opaque, cmdProps.m_opaque, evDest, evData); + } + + /// + /// @brief Broadcast a message without data + /// + /// @param cmdProps Command Properties; used to specify id, atomic, and fencing operations + /// @param evNum the event number + /// @param evChan the channel on which to broadcast + /// @return NMC_SUCCESS or enum indicating reason for failure + nmc_status_t eventBroadcast(NmcCmdProps const &cmdProps, nmc_event_t evNum, uint16_t evChan) { + return nmc_event_broadcast(m_opaque, cmdProps.m_opaque, evNum, evChan); + } + + /// + /// @brief Broadcast a message with data + /// + /// @param cmdProps Command Properties; used to specify id, atomic, and fencing operations + /// @param evNum the event number + /// @param evChan the channel on which to broadcast + /// @param evData the data to be delivered with the event + /// @return NMC_SUCCESS or enum indicating reason for failure + nmc_status_t eventBroadcast(NmcCmdProps const &cmdProps, nmc_event_t evNum, uint16_t evChan, uint64_t evData) { + return nmc_event_broadcast_data(m_opaque, cmdProps.m_opaque, evNum, evChan, evData); + } + + /// + /// @brief Receive an event + /// + /// @param cmdProps Command Properties; used to specify id, atomic, and fencing operations + /// @param evNum the event number to check + /// @return NMC_SUCCESS if the event is present + nmc_status_t eventReceive(NmcCmdProps const &cmdProps, nmc_event_t evNum) { + return nmc_event_receive(m_opaque, cmdProps.m_opaque, evNum); + } + + // Atomics + template <typename T> + nmc_status_t atomicAdd(NmcCmdProps const &cmdProps, T* pAddr, T data) { + if (std::is_floating_point<T>::value) { + if (sizeof(T) == 4) + return nmc_atomic_fadd32(m_opaque, cmdProps.m_opaque, pAddr, (float)data); + else + return nmc_atomic_fadd64(m_opaque, cmdProps.m_opaque, pAddr, (double)data); + } else + return nmc_atomic_add(m_opaque, cmdProps.m_opaque, pAddr, data, sizeof(T)); + } + + template <typename T> + nmc_status_t atomicXor(NmcCmdProps const &cmdProps, T* pAddr, T data) { + return nmc_atomic_xor(m_opaque, cmdProps.m_opaque, pAddr, data, sizeof(T)); + } + + template <typename T> + nmc_status_t atomicAnd(NmcCmdProps const &cmdProps, T* pAddr, T data) { + return nmc_atomic_and(m_opaque, cmdProps.m_opaque, pAddr, data, sizeof(T)); + } + + template <typename T> + nmc_status_t atomicOr(NmcCmdProps const &cmdProps, T* pAddr, T data) { + return nmc_atomic_or(m_opaque, cmdProps.m_opaque, pAddr, data, sizeof(T)); + } + + template <typename T> + nmc_status_t atomicMin(NmcCmdProps const &cmdProps, T* pAddr, T data) { + if (std::is_floating_point<T>::value) { + if (sizeof(T) == 4) + return nmc_atomic_fmin32(m_opaque, cmdProps.m_opaque, pAddr, (float)data); + else + return nmc_atomic_fmin64(m_opaque, cmdProps.m_opaque, pAddr, (double)data); + } else { + if (std::is_unsigned<T>::value) + return nmc_atomic_minu(m_opaque, cmdProps.m_opaque, pAddr, data, sizeof(T)); + else + return nmc_atomic_min(m_opaque, cmdProps.m_opaque, pAddr, data, sizeof(T)); + } + } + + template <typename T> + nmc_status_t atomicMax(NmcCmdProps const &cmdProps, T* pAddr, T data) { + if (std::is_floating_point<T>::value) { + if (sizeof(T) == 4) + return nmc_atomic_fmax32(m_opaque, cmdProps.m_opaque, pAddr, (float)data); + else + return nmc_atomic_fmax64(m_opaque, cmdProps.m_opaque, pAddr, (double)data); + } else { + if (std::is_unsigned<T>::value) + return nmc_atomic_maxu(m_opaque, cmdProps.m_opaque, pAddr, data, sizeof(T)); + else + return nmc_atomic_max(m_opaque, cmdProps.m_opaque, pAddr, data, sizeof(T)); + } + } + + NmcSpinLock spinLockCreate(){ + return NmcSpinLock(m_opaque); + } + +#if 0 + NmcMutex* mutexCreate(); + nmc_status_t mutexDestroy(NmcMutex* mutex); + + + NmcBarrier* barrierCreate() { + return new NmcBarrier(*this); + } + +#endif + +private: + nmc_t m_opaque; +}; diff --git a/src/micron/nmc_errno.h b/src/micron/nmc_errno.h new file mode 100644 index 00000000..81382bf9 --- /dev/null +++ b/src/micron/nmc_errno.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2024 Micron Technology, Inc. + * + * This file is the confidential and proprietary property of + * Micron Technology, Inc. + */ +/* + * NMC Runtime Errno API + */ +#pragma once + +/* + * Error values that are shared across the Host and TE. + */ +#define NMC_SUCCESS 0 +#define NMC_INV_NMC 128 +#define NMC_NO_CHILD 129 +#define NMC_NO_IMAGE 130 +#define NMC_RETRY 131 +#define NMC_NOT_SUPP 132 +#define NMC_INV_BARRIER 133 +#define NMC_INV_MTX 134 +#define NMC_NOTMTX 135 +#define NMC_DEADLCK 136 +#define NMC_RESP_ID 137 +#define NMC_RESP_STATUS 138 +#define NMC_INV_BARRIER_ATTR 139 +#define NMC_INV_VALUE 140 +#define NMC_INV_CMD 141 +#define NMC_INV_AMO_ADDR 142 +#define NMC_NO_MEM 143 +#define NMC_INV_APP_ENG 144 +#define NMC_INV_ARG_TYPE 145 +#define NMC_INV_RTN_ARG_CNT 146 +#define NMC_INV_DEV_ID 147 +#define NMC_INV_HUQ_ID 148 +#define NMC_NO_TENANT 149 + + +/* Host event interface error values */ +#define NMC_INV_EVMODE 150 +#define NMC_INV_EVNUM 151 +#define NMC_INV_EVCHAN 152 +#define NMC_INV_EVCNT 153 +#define NMC_RCV_TERM 154 +#define NMC_INV_EVDST 155 + +/* + * nmc_get_response can return the following for error conditions + */ +#define NMC_NO_TE 252 +#define NMC_NO_DM 253 +#define NMC_NO_SE 254 +#define NMC_NOT_HUC_CMD 255 diff --git a/src/micron/nmc_sync.h b/src/micron/nmc_sync.h new file mode 100644 index 00000000..6db9d4ff --- /dev/null +++ b/src/micron/nmc_sync.h @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2024 Micron Technology, Inc. + * + * This file is the confidential and proprietary property of + * Micron Technology, Inc. + */ + +#pragma once + +#include <inttypes.h> +#include <stdbool.h> +#include <stddef.h> + +#include "nmc_errno.h" +#include "nmc_types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Synchronization primitives + * + * All of the synchronization data types are opaque. We only defined + * a pointer of type X. + * + * spin locks: + * + * Note: The nmc_spinlock_create and nmc_spinlock_destroy are only + * avialable on the host. The nmc_spinlock_trylock, nmc_spinlock_lock, + * and nmc_spinlock_unlock are defined for host and Risc-V. + * + */ +#if !defined(__riscv) +extern nmc_status_t nmc_spinlock_create(nmc_t, nmc_lock_t *); +extern nmc_status_t nmc_spinlock_destroy(nmc_lock_t); +#endif +extern nmc_status_t nmc_spinlock_trylock(nmc_lock_t lock); +extern nmc_status_t nmc_spinlock_lock(nmc_lock_t lock); +extern nmc_status_t nmc_spinlock_unlock(nmc_lock_t lock); + +#if 0 +extern nmc_status_t nmc_mutex_create(nmc_t nmc, nmc_mutex_t *mtx); +extern nmc_status_t nmc_mutex_destroy(nmc_t nmc, nmc_mutex_t mtx); +extern nmc_status_t nmc_mutex_trylock(nmc_mutex_t mtx); +extern nmc_status_t nmc_mutex_lock(nmc_mutex_t mtx); +extern nmc_status_t nmc_mutex_unlock(nmc_mutex_t mtx); + +/* + * Barriers - + */ + +extern nmc_barrier_t nmc_barrier_create(nmc_t nmc); +extern void nmc_barrier_destroy(nmc_t nmc, nmc_barrier_t barrier); +extern nmc_status_t nmc_barrier_add(nmc_t nmc, nmc_barrier_t barrier, uint32_t threads); +#endif + + +#ifdef __cplusplus +} +#endif diff --git a/src/micron/nmc_sync.hpp b/src/micron/nmc_sync.hpp new file mode 100644 index 00000000..1c7dcc6e --- /dev/null +++ b/src/micron/nmc_sync.hpp @@ -0,0 +1,55 @@ +// +// Copyright (C) 2024 Micron Technology, Inc. +// +// This file is the confidential and proprietary property of +// Micron Technology, Inc. +// + +#pragma once + +#include "nmc_sync.h" + +/// +/// Simple spinlocks for Near Memory Compute. +/// +class NmcSpinLock { + friend class Nmc; + +private: + nmc_lock_t m_spinlock; + +#if !defined(__riscv) + // NmcSpinLock & ~NmcSpinLock are not supported on the NDCG device, + // so we do nothing + NmcSpinLock(nmc_t pnmc){ + (void) nmc_spinlock_create(pnmc, &m_spinlock); + } +#else + NmcSpinLock(){ + } +#endif + + +public: +#if !defined(__riscv) + ~NmcSpinLock() { + (void) nmc_spinlock_destroy(m_spinlock); + } +#else + ~NmcSpinLock() { + } +#endif + NmcSpinLock(const NmcSpinLock&) = delete; + NmcSpinLock& operator=(const NmcSpinLock&) = delete; + + int trylock() { + return nmc_spinlock_trylock(m_spinlock); + } + int lock() { + return nmc_spinlock_lock(m_spinlock); + } + int unlock() { + return nmc_spinlock_unlock(m_spinlock); + } +}; + diff --git a/src/micron/nmc_te.hpp b/src/micron/nmc_te.hpp new file mode 100644 index 00000000..f650a56f --- /dev/null +++ b/src/micron/nmc_te.hpp @@ -0,0 +1,1692 @@ +/* + * Copyright (C) 2024 Micron Technology, Inc. + * + * This file is the confidential and proprietary property of + * Micron Technology, Inc. + */ +#pragma once + +#include <stdint.h> +#include <cstring> +#include <type_traits> +#include "nmc_te_intrin.h" + +enum NmcAppEngine { NmcAppEngInvalid = 0, NmcAppEngTe = 1, NmcAppEngSe = 2 }; +enum NmcAmoMode { NmcAmoRelaxMode = 0, NmcAmoAqMode = 1, NmcAmoRlMode = 2, NmcAmoAqRlMode = 3 }; + +class NmcTeCmdProps { +public: + NmcTeCmdProps(uint16_t cmdId = 0) { + m_cmdId = cmdId; + m_isArg1Fp = false; + m_isArg2Fp = false; + m_isArg3Fp = false; + m_isArg4Fp = false; + } + + uint64_t getThreadCreateCmdProps() { + return m_threadCreateCmdProps; + } + + void setCmdId(uint16_t cmdId) { + m_cmdId = cmdId; + } + uint16_t getCmdId() const { + return m_cmdId; + } + + void setCmdAtomic(bool bAtomic) { + m_bCmdAtomic = bAtomic; + } + bool isCmdAtomic() const { + return m_bCmdAtomic; + } + + void setRtnArgCnt(uint32_t args) { + m_rtnArgCnt = args; + } + uint32_t getRtnArgCnt() const { + return m_rtnArgCnt; + } + + void setAppEngine(NmcAppEngine appEng) { + m_appEng = appEng; + } + NmcAppEngine getAppEngine() const { + return m_appEng; + } + + void setEventData(uint64_t eventData) { + m_bEventData = true; + m_eventData = eventData; + } + bool hasEventData() const { + return m_bEventData; + } + uint64_t getEventData() const { + return m_eventData; + } + + void setNonBlocking(bool bNonBlocking) { + m_bNonBlocking = bNonBlocking; + } + bool isNonBlocking() const { + return m_bNonBlocking; + } + + void setAmoMode(NmcAmoMode mode) { + m_amoMode = mode; + } + NmcAmoMode getAmoMode() const { + return m_amoMode; + } + + void setNoFlush(bool bNoFlush) { + m_bNoFlush = bNoFlush; + } + bool isNoFlush() const { + return m_bNoFlush; + } + + void setNoInvalidate(bool bNoInvalidate) { + m_bNoInvalidate = bNoInvalidate; + } + bool isNoInvalidate() const { + return m_bNoInvalidate; + } + + void setBusyFail(bool bBusyFail) { + m_bBusyFail = bBusyFail; + } + bool isBusyFail() const { + return m_bBusyFail; + } + + void setNonTemporal(bool bNonTemporal) { + m_bNonTemporal = bNonTemporal; + } + bool isNonTemporal() const { + return m_bNonTemporal; + } + + void setSpacial(bool bSpacial) { + m_bSpacial = bSpacial; + } + bool isSpacial() const { + return m_bSpacial; + } + + void setArg1Fp(bool isArgFp) { + m_isArg1Fp = isArgFp; + } + void setArg2Fp(bool isArgFp) { + m_isArg2Fp = isArgFp; + } + void setArg3Fp(bool isArgFp) { + m_isArg3Fp = isArgFp; + } + void setArg4Fp(bool isArgFp) { + m_isArg4Fp = isArgFp; + } + +private: + union { + struct { + uint16_t m_cmdId; + uint16_t m_isArg1Fp : 1; + uint16_t m_isArg2Fp : 1; + uint16_t m_isArg3Fp : 1; + uint16_t m_isArg4Fp : 1; + }; + uint32_t m_threadCreateCmdProps; + }; + bool m_bCmdAtomic = false; + uint32_t m_rtnArgCnt = 3; + NmcAppEngine m_appEng = NmcAppEngInvalid; + bool m_bEventData = false; + uint64_t m_eventData = 0; + bool m_bNonBlocking = false; + NmcAmoMode m_amoMode = NmcAmoAqRlMode; + bool m_bNoFlush = false; + bool m_bNoInvalidate = false; + bool m_bBusyFail = false; + bool m_bNonTemporal = false; + bool m_bSpacial = false; +}; + +class NmcTeAppEng : public NmcTeCmdProps { +public: + NmcTeAppEng() : NmcTeCmdProps() + { + setAppEngine(NmcAppEngTe); + } +}; + +class NmcTeRtnArgCnt : public NmcTeCmdProps { +public: + NmcTeRtnArgCnt(uint32_t rtnArgCnt) : NmcTeCmdProps() + { + setRtnArgCnt(rtnArgCnt); + } +}; + +class NmcTeNonBlocking : public NmcTeCmdProps { +public: + NmcTeNonBlocking() : NmcTeCmdProps() + { + setNonBlocking(true); + } +}; + +class NmcTeEventData : public NmcTeCmdProps { +public: + NmcTeEventData(uint64_t data = 0) : NmcTeCmdProps() { + setEventData(data); + } +}; + +template<typename T> +uint64_t extendArg(T arg) +{ + static_assert((std::is_integral<T>::value || std::is_pointer<T>::value || std::is_floating_point<T>::value) && + sizeof(T) <= sizeof(uint64_t)); + if (std::is_floating_point<T>::value) { + uint64_t rtn; + switch (sizeof(T)) { + case 4: + rtn = fmv_x_s(*(float*)&arg); + break; + case 8: + rtn = fmv_x_d(*(double*)&arg); + break; + default: + rtn = 0; + assert(0 && "Invalid Type"); + } + return rtn; + } else + return std::is_signed<T>::value ? (int64_t)arg : (uint64_t)arg; +} + +template<typename T> +void moveArg(T *arg, uint64_t u64) { + static_assert((std::is_integral<T>::value || std::is_pointer<T>::value || std::is_floating_point<T>::value) && + sizeof(T) <= sizeof(uint64_t)); + if (std::is_floating_point<T>::value) + memcpy(arg, &u64, sizeof(uint64_t)); + else + *arg = (T)u64; +} + +// fiber creation + +template<typename TG = uint64_t, typename F = uint64_t> +int nmcFiberCreate(NmcTeCmdProps cmdProps, TG pTarget, F pFunc) +{ + uint64_t seCmdProps = cmdProps.getThreadCreateCmdProps(); + uint64_t teCmdProps = cmdProps.getThreadCreateCmdProps(); + + int64_t status = NMC_SUCCESS; + switch (cmdProps.getAppEngine()) { + case NmcAppEngTe: + if (cmdProps.isBusyFail()) { + if (cmdProps.isNoFlush()) { + switch (cmdProps.getRtnArgCnt()) { + case 0: status = nmc_xfc_te_bf_c0_r0_nf((void *)pTarget, (void *)pFunc, teCmdProps); break; + case 1: status = nmc_xfc_te_bf_c0_r1_nf((void *)pTarget, (void *)pFunc, teCmdProps); break; + case 2: status = nmc_xfc_te_bf_c0_r2_nf((void *)pTarget, (void *)pFunc, teCmdProps); break; + case 3: status = nmc_xfc_te_bf_c0_nr_nf((void *)pTarget, (void *)pFunc, teCmdProps); break; + default: status = NMC_INV_RTN_ARG_CNT; + } + } else { + switch (cmdProps.getRtnArgCnt()) { + case 0: status = nmc_xfc_te_bf_c0_r0((void *)pTarget, (void *)pFunc, teCmdProps); break; + case 1: status = nmc_xfc_te_bf_c0_r1((void *)pTarget, (void *)pFunc, teCmdProps); break; + case 2: status = nmc_xfc_te_bf_c0_r2((void *)pTarget, (void *)pFunc, teCmdProps); break; + case 3: status = nmc_xfc_te_bf_c0_nr((void *)pTarget, (void *)pFunc, teCmdProps); break; + default: status = NMC_INV_RTN_ARG_CNT; + } + } + } else { + if (cmdProps.isNoFlush()) { + switch (cmdProps.getRtnArgCnt()) { + case 0: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c0_r0_nf_at((void *)pTarget, (void *)pFunc, teCmdProps); + else + nmc_xfc_te_c0_r0_nf((void *)pTarget, (void *)pFunc, teCmdProps); + break; + case 1: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c0_r1_nf_at((void *)pTarget, (void *)pFunc, teCmdProps); + else + nmc_xfc_te_c0_r1_nf((void *)pTarget, (void *)pFunc, teCmdProps); + break; + case 2: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c0_r2_nf_at((void *)pTarget, (void *)pFunc, teCmdProps); + else + nmc_xfc_te_c0_r2_nf((void *)pTarget, (void *)pFunc, teCmdProps); + break; + case 3: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c0_nr_nf_at((void *)pTarget, (void *)pFunc, teCmdProps); + else + nmc_xfc_te_c0_nr_nf((void *)pTarget, (void *)pFunc, teCmdProps); + break; + default: + status = NMC_INV_RTN_ARG_CNT; + } + } else { + switch (cmdProps.getRtnArgCnt()) { + case 0: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c0_r0_at((void *)pTarget, (void *)pFunc, teCmdProps); + else + nmc_xfc_te_c0_r0((void *)pTarget, (void *)pFunc, teCmdProps); + break; + case 1: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c0_r1_at((void *)pTarget, (void *)pFunc, teCmdProps); + else + nmc_xfc_te_c0_r1((void *)pTarget, (void *)pFunc, teCmdProps); + break; + case 2: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c0_r2_at((void *)pTarget, (void *)pFunc, teCmdProps); + else + nmc_xfc_te_c0_r2((void *)pTarget, (void *)pFunc, teCmdProps); + break; + case 3: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c0_nr_at((void *)pTarget, (void *)pFunc, teCmdProps); + else + nmc_xfc_te_c0_nr((void *)pTarget, (void *)pFunc, teCmdProps); + break; + default: + status = NMC_INV_RTN_ARG_CNT; + } + } + } + break; + case NmcAppEngSe: + if (cmdProps.isNoFlush()) { + switch (cmdProps.getRtnArgCnt()) { + case 0: + nmc_xfc_se_c0_r0_nf((void *)pTarget, (void *)pFunc, seCmdProps); + break; + case 1: + nmc_xfc_se_c0_r1_nf((void *)pTarget, (void *)pFunc, seCmdProps); + break; + case 2: + nmc_xfc_se_c0_r2_nf((void *)pTarget, (void *)pFunc, seCmdProps); + break; + case 3: + nmc_xfc_se_c0_nr_nf((void *)pTarget, (void *)pFunc, seCmdProps); + break; + default: + status = NMC_INV_RTN_ARG_CNT; + } + } else { + switch (cmdProps.getRtnArgCnt()) { + case 0: + nmc_xfc_se_c0_r0((void *)pTarget, (void *)pFunc, seCmdProps); + break; + case 1: + nmc_xfc_se_c0_r1((void *)pTarget, (void *)pFunc, seCmdProps); + break; + case 2: + nmc_xfc_se_c0_r2((void *)pTarget, (void *)pFunc, seCmdProps); + break; + case 3: + nmc_xfc_se_c0_nr((void *)pTarget, (void *)pFunc, seCmdProps); + break; + default: + status = NMC_INV_RTN_ARG_CNT; + } + } + break; + default: + return NMC_INV_APP_ENG; + } + return status; +} + +template<typename T1 = uint64_t> +int nmcFiberCreate(NmcTeCmdProps cmdProps, void *pTarget, void *pFunc, + T1 arg1) +{ + uint64_t a1 = extendArg(arg1); + + uint64_t seCmdProps = cmdProps.getThreadCreateCmdProps(); + + cmdProps.setArg1Fp(std::is_floating_point<T1>::value); + uint64_t teCmdProps = cmdProps.getThreadCreateCmdProps(); + + int64_t status = NMC_SUCCESS; + switch (cmdProps.getAppEngine()) { + case NmcAppEngTe: + if (cmdProps.isBusyFail()) { + if (cmdProps.isNoFlush()) { + switch (cmdProps.getRtnArgCnt()) { + case 0: status = nmc_xfc_te_bf_c1_r0_nf(pTarget, pFunc, teCmdProps, a1); break; + case 1: status = nmc_xfc_te_bf_c1_r1_nf(pTarget, pFunc, teCmdProps, a1); break; + case 2: status = nmc_xfc_te_bf_c1_r2_nf(pTarget, pFunc, teCmdProps, a1); break; + case 3: status = nmc_xfc_te_bf_c1_nr_nf(pTarget, pFunc, teCmdProps, a1); break; + default: status = NMC_INV_RTN_ARG_CNT; + } + } else { + switch (cmdProps.getRtnArgCnt()) { + case 0: status = nmc_xfc_te_bf_c1_r0(pTarget, pFunc, teCmdProps, a1); break; + case 1: status = nmc_xfc_te_bf_c1_r1(pTarget, pFunc, teCmdProps, a1); break; + case 2: status = nmc_xfc_te_bf_c1_r2(pTarget, pFunc, teCmdProps, a1); break; + case 3: status = nmc_xfc_te_bf_c1_nr(pTarget, pFunc, teCmdProps, a1); break; + default: status = NMC_INV_RTN_ARG_CNT; + } + } + } else { + if (cmdProps.isNoFlush()) { + switch (cmdProps.getRtnArgCnt()) { + case 0: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c1_r0_nf_at(pTarget, pFunc, teCmdProps, a1); + else + nmc_xfc_te_c1_r0_nf(pTarget, pFunc, teCmdProps, a1); + break; + case 1: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c1_r1_nf_at(pTarget, pFunc, teCmdProps, a1); + else + nmc_xfc_te_c1_r1_nf(pTarget, pFunc, teCmdProps, a1); + break; + case 2: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c1_r2_nf_at(pTarget, pFunc, teCmdProps, a1); + else + nmc_xfc_te_c1_r2_nf(pTarget, pFunc, teCmdProps, a1); + break; + case 3: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c1_nr_nf_at(pTarget, pFunc, teCmdProps, a1); + else + nmc_xfc_te_c1_nr_nf(pTarget, pFunc, teCmdProps, a1); + break; + default: status = NMC_INV_RTN_ARG_CNT; + } + } else { + switch (cmdProps.getRtnArgCnt()) { + case 0: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c1_r0_at(pTarget, pFunc, teCmdProps, a1); + else + nmc_xfc_te_c1_r0(pTarget, pFunc, teCmdProps, a1); + break; + case 1: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c1_r1_at(pTarget, pFunc, teCmdProps, a1); + else + nmc_xfc_te_c1_r1(pTarget, pFunc, teCmdProps, a1); + break; + case 2: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c1_r2_at(pTarget, pFunc, teCmdProps, a1); + else + nmc_xfc_te_c1_r2(pTarget, pFunc, teCmdProps, a1); + break; + case 3: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c1_nr_at(pTarget, pFunc, teCmdProps, a1); + else + nmc_xfc_te_c1_nr(pTarget, pFunc, teCmdProps, a1); + break; + default: status = NMC_INV_RTN_ARG_CNT; + } + } + } + break; + case NmcAppEngSe: + if (cmdProps.isNoFlush()) { + switch (cmdProps.getRtnArgCnt()) { + case 0: + nmc_xfc_se_c1_r0_nf(pTarget, pFunc, seCmdProps, a1); + break; + case 1: + nmc_xfc_se_c1_r1_nf(pTarget, pFunc, seCmdProps, a1); + break; + case 2: + nmc_xfc_se_c1_r2_nf(pTarget, pFunc, seCmdProps, a1); + break; + case 3: + nmc_xfc_se_c1_nr_nf(pTarget, pFunc, seCmdProps, a1); + break; + default: status = NMC_INV_RTN_ARG_CNT; + } + } else { + switch (cmdProps.getRtnArgCnt()) { + case 0: + nmc_xfc_se_c1_r0(pTarget, pFunc, seCmdProps, a1); + break; + case 1: + nmc_xfc_se_c1_r1(pTarget, pFunc, seCmdProps, a1); + break; + case 2: + nmc_xfc_se_c1_r2(pTarget, pFunc, seCmdProps, a1); + break; + case 3: + nmc_xfc_se_c1_nr(pTarget, pFunc, seCmdProps, a1); + break; + default: status = NMC_INV_RTN_ARG_CNT; + } + } + break; + default: + return NMC_INV_APP_ENG; + } + return status; +} + +template<typename T1 = uint64_t, typename T2 = uint64_t> +int nmcFiberCreate(NmcTeCmdProps cmdProps, void *pTarget, void *pFunc, + T1 arg1, T2 arg2) +{ + uint64_t a1 = extendArg(arg1); + uint64_t a2 = extendArg(arg2); + + uint64_t seCmdProps = cmdProps.getThreadCreateCmdProps(); + + cmdProps.setArg1Fp(std::is_floating_point<T1>::value); + cmdProps.setArg2Fp(std::is_floating_point<T2>::value); + uint64_t teCmdProps = cmdProps.getThreadCreateCmdProps(); + + int64_t status = NMC_SUCCESS; + switch (cmdProps.getAppEngine()) { + case NmcAppEngTe: + if (cmdProps.isBusyFail()) { + if (cmdProps.isNoFlush()) { + switch (cmdProps.getRtnArgCnt()) { + case 0: status = nmc_xfc_te_bf_c2_r0_nf(pTarget, pFunc, teCmdProps, a1, a2); break; + case 1: status = nmc_xfc_te_bf_c2_r1_nf(pTarget, pFunc, teCmdProps, a1, a2); break; + case 2: status = nmc_xfc_te_bf_c2_r2_nf(pTarget, pFunc, teCmdProps, a1, a2); break; + case 3: status = nmc_xfc_te_bf_c2_nr_nf(pTarget, pFunc, teCmdProps, a1, a2); break; + default:status = NMC_INV_RTN_ARG_CNT; + } + } else { + switch (cmdProps.getRtnArgCnt()) { + case 0: status = nmc_xfc_te_bf_c2_r0(pTarget, pFunc, teCmdProps, a1, a2); break; + case 1: status = nmc_xfc_te_bf_c2_r1(pTarget, pFunc, teCmdProps, a1, a2); break; + case 2: status = nmc_xfc_te_bf_c2_r2(pTarget, pFunc, teCmdProps, a1, a2); break; + case 3: status = nmc_xfc_te_bf_c2_nr(pTarget, pFunc, teCmdProps, a1, a2); break; + default:status = NMC_INV_RTN_ARG_CNT; + } + } + } else { + if (cmdProps.isNoFlush()) { + switch (cmdProps.getRtnArgCnt()) { + case 0: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c2_r0_nf_at(pTarget, pFunc, teCmdProps, a1, a2); + else + nmc_xfc_te_c2_r0_nf(pTarget, pFunc, teCmdProps, a1, a2); + break; + case 1: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c2_r1_nf_at(pTarget, pFunc, teCmdProps, a1, a2); + else + nmc_xfc_te_c2_r1_nf(pTarget, pFunc, teCmdProps, a1, a2); + break; + case 2: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c2_r2_nf_at(pTarget, pFunc, teCmdProps, a1, a2); + else + nmc_xfc_te_c2_r2_nf(pTarget, pFunc, teCmdProps, a1, a2); + break; + case 3: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c2_nr_nf_at(pTarget, pFunc, teCmdProps, a1, a2); + else + nmc_xfc_te_c2_nr_nf(pTarget, pFunc, teCmdProps, a1, a2); + break; + default: + status = NMC_INV_RTN_ARG_CNT; + } + } else { + switch (cmdProps.getRtnArgCnt()) { + case 0: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c2_r0_at(pTarget, pFunc, teCmdProps, a1, a2); + else + nmc_xfc_te_c2_r0(pTarget, pFunc, teCmdProps, a1, a2); + break; + case 1: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c2_r1_at(pTarget, pFunc, teCmdProps, a1, a2); + else + nmc_xfc_te_c2_r1(pTarget, pFunc, teCmdProps, a1, a2); + break; + case 2: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c2_r2_at(pTarget, pFunc, teCmdProps, a1, a2); + else + nmc_xfc_te_c2_r2(pTarget, pFunc, teCmdProps, a1, a2); + break; + case 3: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c2_nr_at(pTarget, pFunc, teCmdProps, a1, a2); + else + nmc_xfc_te_c2_nr(pTarget, pFunc, teCmdProps, a1, a2); + break; + default: + status = NMC_INV_RTN_ARG_CNT; + } + } + } + break; + case NmcAppEngSe: + if (cmdProps.isNoFlush()) { + switch (cmdProps.getRtnArgCnt()) { + case 0: + nmc_xfc_se_c2_r0_nf(pTarget, pFunc, seCmdProps, a1, a2); + break; + case 1: + nmc_xfc_se_c2_r1_nf(pTarget, pFunc, seCmdProps, a1, a2); + break; + case 2: + nmc_xfc_se_c2_r2_nf(pTarget, pFunc, seCmdProps, a1, a2); + break; + case 3: + nmc_xfc_se_c2_nr_nf(pTarget, pFunc, seCmdProps, a1, a2); + break; + default: + status = NMC_INV_RTN_ARG_CNT; + } + } else { + switch (cmdProps.getRtnArgCnt()) { + case 0: + nmc_xfc_se_c2_r0(pTarget, pFunc, seCmdProps, a1, a2); + break; + case 1: + nmc_xfc_se_c2_r1(pTarget, pFunc, seCmdProps, a1, a2); + break; + case 2: + nmc_xfc_se_c2_r2(pTarget, pFunc, seCmdProps, a1, a2); + break; + case 3: + nmc_xfc_se_c2_nr(pTarget, pFunc, seCmdProps, a1, a2); + break; + default: + status = NMC_INV_RTN_ARG_CNT; + } + } + break; + default: + return NMC_INV_APP_ENG; + } + return status; +} + +template<typename T1 = uint64_t, typename T2 = uint64_t, + typename T3 = uint64_t, typename T4 = uint64_t> + int64_t nmcFiberCreate(NmcTeCmdProps &cmdProps, void *pTarget, void *pFunc, + T1 arg1, T2 arg2, T3 arg3, T4 arg4 = 0) +{ + uint64_t a1 = extendArg(arg1); + uint64_t a2 = extendArg(arg2); + uint64_t a3 = extendArg(arg3); + uint64_t a4 = extendArg(arg4); + + uint64_t seCmdProps = cmdProps.getThreadCreateCmdProps(); + + cmdProps.setArg1Fp(std::is_floating_point<T1>::value); + cmdProps.setArg2Fp(std::is_floating_point<T2>::value); + cmdProps.setArg3Fp(std::is_floating_point<T3>::value); + cmdProps.setArg4Fp(std::is_floating_point<T4>::value); + uint64_t teCmdProps = cmdProps.getThreadCreateCmdProps(); + + int64_t status = NMC_SUCCESS; + switch (cmdProps.getAppEngine()) { + case NmcAppEngTe: + if (cmdProps.isBusyFail()) { + if (cmdProps.isNoFlush()) { + switch (cmdProps.getRtnArgCnt()) { + case 0: status = nmc_xfc_te_bf_c4_r0_nf(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); break; + case 1: status = nmc_xfc_te_bf_c4_r1_nf(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); break; + case 2: status = nmc_xfc_te_bf_c4_r2_nf(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); break; + case 3: status = nmc_xfc_te_bf_c4_nr_nf(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); break; + default:status = NMC_INV_RTN_ARG_CNT; + } + } else { + switch (cmdProps.getRtnArgCnt()) { + case 0: status = nmc_xfc_te_bf_c4_r0(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); break; + case 1: status = nmc_xfc_te_bf_c4_r1(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); break; + case 2: status = nmc_xfc_te_bf_c4_r2(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); break; + case 3: status = nmc_xfc_te_bf_c4_nr(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); break; + default: status = NMC_INV_RTN_ARG_CNT; + } + } + } else { + if (cmdProps.isNoFlush()) { + switch (cmdProps.getRtnArgCnt()) { + case 0: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c4_r0_nf_at(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); + else + nmc_xfc_te_c4_r0_nf(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); + break; + case 1: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c4_r1_nf_at(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); + else + nmc_xfc_te_c4_r1_nf(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); + break; + case 2: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c4_r2_nf_at(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); + else + nmc_xfc_te_c4_r2_nf(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); + break; + case 3: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c4_nr_nf_at(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); + else + nmc_xfc_te_c4_nr_nf(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); + break; + default: + status = NMC_INV_RTN_ARG_CNT; + } + } else { + switch (cmdProps.getRtnArgCnt()) { + case 0: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c4_r0_at(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); + else + nmc_xfc_te_c4_r0(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); + break; + case 1: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c4_r1_at(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); + else + nmc_xfc_te_c4_r1(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); + break; + case 2: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c4_r2_at(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); + else + nmc_xfc_te_c4_r2(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); + break; + case 3: + if (cmdProps.isCmdAtomic()) + nmc_xfc_te_c4_nr_at(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); + else + nmc_xfc_te_c4_nr(pTarget, pFunc, teCmdProps, a1, a2, a3, a4); + break; + default: + status = NMC_INV_RTN_ARG_CNT; + } + } + } + break; + case NmcAppEngSe: + if (cmdProps.isNoFlush()) { + switch (cmdProps.getRtnArgCnt()) { + case 0: + nmc_xfc_se_c4_r0_nf(pTarget, pFunc, seCmdProps, a1, a2, a3, a4); + break; + case 1: + nmc_xfc_se_c4_r1_nf(pTarget, pFunc, seCmdProps, a1, a2, a3, a4); + break; + case 2: + nmc_xfc_se_c4_r2_nf(pTarget, pFunc, seCmdProps, a1, a2, a3, a4); + break; + case 3: + nmc_xfc_se_c4_nr_nf(pTarget, pFunc, seCmdProps, a1, a2, a3, a4); + break; + default: + status = NMC_INV_RTN_ARG_CNT; + } + } else { + switch (cmdProps.getRtnArgCnt()) { + case 0: + nmc_xfc_se_c4_r0(pTarget, pFunc, seCmdProps, a1, a2, a3, a4); + break; + case 1: + nmc_xfc_se_c4_r1(pTarget, pFunc, seCmdProps, a1, a2, a3, a4); + break; + case 2: + nmc_xfc_se_c4_r2(pTarget, pFunc, seCmdProps, a1, a2, a3, a4); + break; + case 3: + nmc_xfc_se_c4_nr(pTarget, pFunc, seCmdProps, a1, a2, a3, a4); + break; + default: + status = NMC_INV_RTN_ARG_CNT; + } + } + break; + default: + return NMC_INV_APP_ENG; + } + return status; +} + +inline int nmcFiberJoin(NmcTeCmdProps const &cmdProps, uint16_t *_cmdId) +{ + int64_t cmdId; + if (cmdProps.isNoInvalidate()) { + if (cmdProps.isNonBlocking()) + nmc_xfj_r0_nb_ni(&cmdId); + else + nmc_xfj_r0_ni(&cmdId); + } else { + if (cmdProps.isNonBlocking()) + nmc_xfj_r0_nb(&cmdId); + else + nmc_xfj_r0(&cmdId); + } + *_cmdId = cmdId; + return cmdId < 0 ? NMC_RETRY : NMC_SUCCESS; +} + +template<typename T1 = uint64_t> +inline int nmcFiberJoin(NmcTeCmdProps const &cmdProps, uint16_t *_cmdId, T1 *ret1) +{ + int64_t cmdId; + uint64_t a1; + if (cmdProps.isNoInvalidate()) { + if (cmdProps.isNonBlocking()) + nmc_xfj_r1_nb_ni(&cmdId, &a1); + else + nmc_xfj_r1_ni(&cmdId, &a1); + } else { + if (cmdProps.isNonBlocking()) + nmc_xfj_r1_nb(&cmdId, &a1); + else + nmc_xfj_r1(&cmdId, &a1); + } + *_cmdId = cmdId; + moveArg(ret1, a1); + return cmdId < 0 ? NMC_RETRY : NMC_SUCCESS; +} + +template<typename T1 = uint64_t, typename T2 = uint64_t> +inline int nmcFiberJoin(NmcTeCmdProps const &cmdProps, uint16_t *_cmdId, T1 *ret1, T2 *ret2) +{ + int64_t cmdId; + uint64_t a1; + uint64_t a2; + if (cmdProps.isNoInvalidate()) { + if (cmdProps.isNonBlocking()) + nmc_xfj_r2_nb_ni(&cmdId, &a1, &a2); + else + nmc_xfj_r2_ni(&cmdId, &a1, &a2); + } else { + if (cmdProps.isNonBlocking()) + nmc_xfj_r2_nb(&cmdId, &a1, &a2); + else + nmc_xfj_r2(&cmdId, &a1, &a2); + } + *_cmdId = cmdId; + moveArg(ret1, a1); + moveArg(ret2, a2); + return cmdId < 0 ? NMC_RETRY : NMC_SUCCESS; +} + +inline int64_t nmcFiberJoinAll(NmcTeCmdProps const &cmdProps) +{ + int rtn; + if (cmdProps.isNoInvalidate()) { + if (cmdProps.isNonBlocking()) + nmc_xfja_ni_nb(); + else + nmc_xfja_ni(); + } else { + if (cmdProps.isNonBlocking()) + nmc_xfja_nb(); + else + nmc_xfja(); + } + return rtn; +} + +inline void nmcReturn() +{ + nmc_xtr_r0(); +} + +template<typename T1 = uint64_t> +inline void nmcReturn(T1 arg1) +{ + uint64_t a1 = extendArg(arg1); + + nmc_xtr_r1(a1); +} + +template<typename T1 = uint64_t, typename T2 = uint64_t> +inline void nmcReturn(T1 arg1, T2 arg2) +{ + uint64_t a1 = extendArg(arg1); + uint64_t a2 = extendArg(arg2); + + nmc_xtr_r2(a1, a2); +} + +inline double nmcGetAttachedTimeNs() +{ + return nmc_csr_rdtime(); +} + +// +// nmcPrintf +// NMC TE thread safe version of printf. +// +// arguments +// fmt format string use to print the additional arguments +// ... the additional arguments. +// +// return +// Upon successful return, this function returns the number of +// characters printted. +// +#define nmcPrintf nmc_te_printf +#define nmcPrintfTime nmc_te_printf_time + +// +// Event routines +// +enum NmcEventReceiveMode { UndefinedMode, SimpleMode, CollectiveMode, BroadcastMode }; +inline void nmcSetEventReceiveMode(NmcEventReceiveMode mode, uint64_t eventNum, uint64_t eventCountOrRecvChan = 0) { + switch (mode) { + case SimpleMode: + nmc_xem_sm(eventNum); + break; + case CollectiveMode: + nmc_xem_cm(eventNum, eventCountOrRecvChan); + break; + case BroadcastMode: + nmc_xem_bm(eventNum, eventCountOrRecvChan); + break; + default: + assert(0 && "Invalid event receive mode"); + } +} + +inline uint64_t nmcEventGetDest(uint64_t eventNum) { + return nmc_xed(eventNum); +} + +inline void nmcEventSend(NmcTeCmdProps const &cmdProps, uint64_t eventDest) { + if (cmdProps.isNoFlush()) { + if (cmdProps.hasEventData()) + nmc_xes_d_nf(eventDest, cmdProps.getEventData()); + else + nmc_xes_nf(eventDest); + } else { + if (cmdProps.hasEventData()) + nmc_xes_d(eventDest, cmdProps.getEventData()); + else + nmc_xes(eventDest); + } +} + +inline void nmcEventBroadcast(NmcTeCmdProps const &cmdProps, uint64_t eventNum, uint64_t eventChan) { + if (cmdProps.isNoFlush()) { + if (cmdProps.hasEventData()) + nmc_xeb_d_nf(eventNum, eventChan, cmdProps.getEventData()); + else + nmc_xeb_nf(eventNum, eventChan); + } else { + if (cmdProps.hasEventData()) + nmc_xeb_d(eventNum, eventChan, cmdProps.getEventData()); + else + nmc_xeb(eventNum, eventChan); + } +} + +inline uint64_t nmcEventListen(NmcTeCmdProps const &cmdProps, uint64_t eventMask) { + if (cmdProps.isNonBlocking()) + return nmc_xel_nb(eventMask); + else + return nmc_xel(eventMask); +} + +template<typename T = uint64_t> +inline uint64_t nmcEventReceive(NmcTeCmdProps const &cmdProps, T eventNum) { + if (cmdProps.isNoInvalidate()) { + if (cmdProps.hasEventData()) + return nmc_xer_d_ni(eventNum); + else { + nmc_xer_ni(eventNum); + return 0; + } + } else { + if (cmdProps.hasEventData()) + return nmc_xer_d(eventNum); + else { + nmc_xer(eventNum); + return 0; + } + } +} + +// Atomics + +template<typename T = uint32_t> +inline T nmcAtomicAdd(NmcTeCmdProps const &cmdProps, T *pAddr, T value) { + if (cmdProps.isNonBlocking()) { + switch (sizeof(T)) { + case 4: + if (std::is_floating_point<T>::value) { + switch (cmdProps.getAmoMode()) { + default: nmc_amofadd_s_nb((volatile float *)pAddr, value); break; + case NmcAmoAqMode: nmc_amofadd_s_aq_nb((volatile float *)pAddr, value); break; + case NmcAmoRlMode: nmc_amofadd_s_rl_nb((volatile float *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amofadd_s_aqrl_nb((volatile float *)pAddr, value); break; + } + } else { + switch (cmdProps.getAmoMode()) { + default: nmc_amoadd_w_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoAqMode: nmc_amoadd_w_aq_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoRlMode: nmc_amoadd_w_rl_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amoadd_w_aqrl_nb((volatile int32_t *)pAddr, value); break; + } + } + break; + case 8: + if (std::is_floating_point<T>::value) { + switch (cmdProps.getAmoMode()) { + default: nmc_amofadd_d_nb((volatile double *)pAddr, value); break; + case NmcAmoAqMode: nmc_amofadd_d_aq_nb((volatile double *)pAddr, value); break; + case NmcAmoRlMode: nmc_amofadd_d_rl_nb((volatile double *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amofadd_d_aqrl_nb((volatile double *)pAddr, value); break; + } + } else { + switch (cmdProps.getAmoMode()) { + default: nmc_amoadd_d_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoAqMode: nmc_amoadd_d_aq_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoRlMode: nmc_amoadd_d_rl_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amoadd_d_aqrl_nb((volatile int64_t *)pAddr, value); break; + } + } + break; + default: + assert(0 && "Invalid type"); + } + return 0; + } else { + switch (sizeof(T)) { + case 4: + if (std::is_floating_point<T>::value) { + switch (cmdProps.getAmoMode()) { + default: return nmc_amofadd_s((volatile float *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amofadd_s_aq((volatile float *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amofadd_s_rl((volatile float *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amofadd_s_aqrl((volatile float *)pAddr, value); break; + } + } else { + switch (cmdProps.getAmoMode()) { + default: return nmc_amoadd_w((volatile int32_t *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amoadd_w_aq((volatile int32_t *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amoadd_w_rl((volatile int32_t *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amoadd_w_aqrl((volatile int32_t *)pAddr, value); break; + } + } + break; + case 8: + if (std::is_floating_point<T>::value) { + switch (cmdProps.getAmoMode()) { + default: return nmc_amofadd_d((volatile double *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amofadd_d_aq((volatile double *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amofadd_d_rl((volatile double *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amofadd_d_aqrl((volatile double *)pAddr, value); break; + } + } else { + switch (cmdProps.getAmoMode()) { + default: return nmc_amoadd_d((volatile int64_t *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amoadd_d_aq((volatile int64_t *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amoadd_d_rl((volatile int64_t *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amoadd_d_aqrl((volatile int64_t *)pAddr, value); break; + } + } + break; + default: + assert(0 && "Invalid type"); + return 0; + } + } +} + +template<typename T = uint32_t> +inline T nmcAtomicMin(NmcTeCmdProps const &cmdProps, T *pAddr, T value) { + if (cmdProps.isNonBlocking()) { + if (std::is_floating_point<T>::value) { + switch (sizeof(T)) { + case 4: + switch (cmdProps.getAmoMode()) { + default: nmc_amofmin_s_nb((volatile float *)pAddr, value); break; + case NmcAmoAqMode: nmc_amofmin_s_aq_nb((volatile float *)pAddr, value); break; + case NmcAmoRlMode: nmc_amofmin_s_rl_nb((volatile float *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amofmin_s_aqrl_nb((volatile float *)pAddr, value); break; + } + break; + case 8: + switch (cmdProps.getAmoMode()) { + default: nmc_amofmin_d_nb((volatile double *)pAddr, value); break; + case NmcAmoAqMode: nmc_amofmin_d_aq_nb((volatile double *)pAddr, value); break; + case NmcAmoRlMode: nmc_amofmin_d_rl_nb((volatile double *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amofmin_d_aqrl_nb((volatile double *)pAddr, value); break; + } + break; + default: + assert(0 && "Invalid type"); + } + } else if (std::is_unsigned<T>::value) { + switch (sizeof(T)) { + case 4: + switch (cmdProps.getAmoMode()) { + default: nmc_amominu_w_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoAqMode: nmc_amominu_w_aq_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoRlMode: nmc_amominu_w_rl_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amominu_w_aqrl_nb((volatile int32_t *)pAddr, value); break; + } + break; + case 8: + switch (cmdProps.getAmoMode()) { + default: nmc_amominu_d_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoAqMode: nmc_amominu_d_aq_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoRlMode: nmc_amominu_d_rl_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amominu_d_aqrl_nb((volatile int64_t *)pAddr, value); break; + } + break; + default: + assert(0 && "Invalid type"); + } + } else { + switch (sizeof(T)) { + case 4: + switch (cmdProps.getAmoMode()) { + default: nmc_amomin_w_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoAqMode: nmc_amomin_w_aq_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoRlMode: nmc_amomin_w_rl_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amomin_w_aqrl_nb((volatile int32_t *)pAddr, value); break; + } + break; + case 8: + switch (cmdProps.getAmoMode()) { + default: nmc_amomin_d_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoAqMode: nmc_amomin_d_aq_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoRlMode: nmc_amomin_d_rl_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amomin_d_aqrl_nb((volatile int64_t *)pAddr, value); break; + } + break; + default: + assert(0 && "Invalid type"); + } + } + return 0; + } else { + if (std::is_floating_point<T>::value) { + switch (sizeof(T)) { + case 4: + switch (cmdProps.getAmoMode()) { + default: return nmc_amofmin_s((volatile float *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amofmin_s_aq((volatile float *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amofmin_s_rl((volatile float *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amofmin_s_aqrl((volatile float *)pAddr, value); break; + } + break; + case 8: + switch (cmdProps.getAmoMode()) { + default: return nmc_amofmin_d((volatile double *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amofmin_d_aq((volatile double *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amofmin_d_rl((volatile double *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amofmin_d_aqrl((volatile double *)pAddr, value); break; + } + break; + default: + assert(0 && "Invalid type"); + return 0; + } + } else if (std::is_unsigned<T>::value) { + switch (sizeof(T)) { + case 4: + switch (cmdProps.getAmoMode()) { + default: return nmc_amominu_w((volatile int32_t *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amominu_w_aq((volatile int32_t *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amominu_w_rl((volatile int32_t *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amominu_w_aqrl((volatile int32_t *)pAddr, value); break; + } + break; + case 8: + switch (cmdProps.getAmoMode()) { + default: return nmc_amominu_d((volatile int64_t *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amominu_d_aq((volatile int64_t *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amominu_d_rl((volatile int64_t *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amominu_d_aqrl((volatile int64_t *)pAddr, value); break; + } + break; + default: + assert(0 && "Invalid type"); + return 0; + } + } else { + switch (sizeof(T)) { + case 4: + switch (cmdProps.getAmoMode()) { + default: return nmc_amomin_w((volatile int32_t *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amomin_w_aq((volatile int32_t *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amomin_w_rl((volatile int32_t *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amomin_w_aqrl((volatile int32_t *)pAddr, value); break; + } + break; + case 8: + switch (cmdProps.getAmoMode()) { + default: return nmc_amomin_d((volatile int64_t *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amomin_d_aq((volatile int64_t *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amomin_d_rl((volatile int64_t *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amomin_d_aqrl((volatile int64_t *)pAddr, value); break; + } + break; + default: + assert(0 && "Invalid type"); + return 0; + } + } + } +} + +template<typename T = uint32_t> +inline T nmcAtomicMax(NmcTeCmdProps const &cmdProps, T *pAddr, T value) { + if (cmdProps.isNonBlocking()) { + if (std::is_floating_point<T>::value) { + switch (sizeof(T)) { + case 4: + switch (cmdProps.getAmoMode()) { + default: nmc_amofmax_s_nb((volatile float *)pAddr, value); break; + case NmcAmoAqMode: nmc_amofmax_s_aq_nb((volatile float *)pAddr, value); break; + case NmcAmoRlMode: nmc_amofmax_s_rl_nb((volatile float *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amofmax_s_aqrl_nb((volatile float *)pAddr, value); break; + } + break; + case 8: + switch (cmdProps.getAmoMode()) { + default: nmc_amofmax_d_nb((volatile double *)pAddr, value); break; + case NmcAmoAqMode: nmc_amofmax_d_aq_nb((volatile double *)pAddr, value); break; + case NmcAmoRlMode: nmc_amofmax_d_rl_nb((volatile double *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amofmax_d_aqrl_nb((volatile double *)pAddr, value); break; + } + break; + default: + assert(0 && "Invalid type"); + } + } else if (std::is_unsigned<T>::value) { + switch (sizeof(T)) { + case 4: + switch (cmdProps.getAmoMode()) { + default: nmc_amomaxu_w_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoAqMode: nmc_amomaxu_w_aq_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoRlMode: nmc_amomaxu_w_rl_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amomaxu_w_aqrl_nb((volatile int32_t *)pAddr, value); break; + } + break; + case 8: + switch (cmdProps.getAmoMode()) { + default: nmc_amomaxu_d_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoAqMode: nmc_amomaxu_d_aq_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoRlMode: nmc_amomaxu_d_rl_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amomaxu_d_aqrl_nb((volatile int64_t *)pAddr, value); break; + } + break; + default: + assert(0 && "Invalid type"); + } + } else { + switch (sizeof(T)) { + case 4: + switch (cmdProps.getAmoMode()) { + default: nmc_amomax_w_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoAqMode: nmc_amomax_w_aq_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoRlMode: nmc_amomax_w_rl_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amomax_w_aqrl_nb((volatile int32_t *)pAddr, value); break; + } + break; + case 8: + switch (cmdProps.getAmoMode()) { + default: nmc_amomax_d_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoAqMode: nmc_amomax_d_aq_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoRlMode: nmc_amomax_d_rl_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amomax_d_aqrl_nb((volatile int64_t *)pAddr, value); break; + } + break; + default: + assert(0 && "Invalid type"); + } + } + return 0; + } else { + if (std::is_floating_point<T>::value) { + switch (sizeof(T)) { + case 4: + switch (cmdProps.getAmoMode()) { + default: return nmc_amofmax_s((volatile float *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amofmax_s_aq((volatile float *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amofmax_s_rl((volatile float *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amofmax_s_aqrl((volatile float *)pAddr, value); break; + } + break; + case 8: + switch (cmdProps.getAmoMode()) { + default: return nmc_amofmax_d((volatile double *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amofmax_d_aq((volatile double *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amofmax_d_rl((volatile double *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amofmax_d_aqrl((volatile double *)pAddr, value); break; + } + break; + default: + assert(0 && "Invalid type"); + return 0; + } + } else if (std::is_unsigned<T>::value) { + switch (sizeof(T)) { + case 4: + switch (cmdProps.getAmoMode()) { + default: return nmc_amomaxu_w((volatile int32_t *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amomaxu_w_aq((volatile int32_t *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amomaxu_w_rl((volatile int32_t *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amomaxu_w_aqrl((volatile int32_t *)pAddr, value); break; + } + break; + case 8: + switch (cmdProps.getAmoMode()) { + default: return nmc_amomaxu_d((volatile int64_t *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amomaxu_d_aq((volatile int64_t *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amomaxu_d_rl((volatile int64_t *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amomaxu_d_aqrl((volatile int64_t *)pAddr, value); break; + } + break; + default: + assert(0 && "Invalid type"); + return 0; + } + } else { + switch (sizeof(T)) { + case 4: + switch (cmdProps.getAmoMode()) { + default: return nmc_amomax_w((volatile int32_t *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amomax_w_aq((volatile int32_t *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amomax_w_rl((volatile int32_t *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amomax_w_aqrl((volatile int32_t *)pAddr, value); break; + } + break; + case 8: + switch (cmdProps.getAmoMode()) { + default: return nmc_amomax_d((volatile int64_t *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amomax_d_aq((volatile int64_t *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amomax_d_rl((volatile int64_t *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amomax_d_aqrl((volatile int64_t *)pAddr, value); break; + } + break; + default: + assert(0 && "Invalid type"); + return 0; + } + } + } +} + +template<typename T = uint32_t> +inline T nmcAtomicXor(NmcTeCmdProps const &cmdProps, T *pAddr, T value) { + if (cmdProps.isNonBlocking()) { + switch (sizeof(T)) { + case 4: + switch (cmdProps.getAmoMode()) { + default: nmc_amoxor_w_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoAqMode: nmc_amoxor_w_aq_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoRlMode: nmc_amoxor_w_rl_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amoxor_w_aqrl_nb((volatile int32_t *)pAddr, value); break; + } + break; + case 8: + switch (cmdProps.getAmoMode()) { + default: nmc_amoxor_d_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoAqMode: nmc_amoxor_d_aq_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoRlMode: nmc_amoxor_d_rl_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amoxor_d_aqrl_nb((volatile int64_t *)pAddr, value); break; + } + break; + default: + assert(0 && "Invalid type"); + } + return 0; + } else { + switch (sizeof(T)) { + case 4: + switch (cmdProps.getAmoMode()) { + default: return nmc_amoxor_w((volatile int32_t *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amoxor_w_aq((volatile int32_t *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amoxor_w_rl((volatile int32_t *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amoxor_w_aqrl((volatile int32_t *)pAddr, value); break; + } + break; + case 8: + switch (cmdProps.getAmoMode()) { + default: return nmc_amoxor_d((volatile int64_t *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amoxor_d_aq((volatile int64_t *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amoxor_d_rl((volatile int64_t *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amoxor_d_aqrl((volatile int64_t *)pAddr, value); break; + } + break; + default: + assert(0 && "Invalid type"); + return 0; + } + } +} + +template<typename T = uint32_t> +inline T nmcAtomicOr(NmcTeCmdProps const &cmdProps, T *pAddr, T value) { + if (cmdProps.isNonBlocking()) { + switch (sizeof(T)) { + case 4: + switch (cmdProps.getAmoMode()) { + default: nmc_amoor_w_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoAqMode: nmc_amoor_w_aq_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoRlMode: nmc_amoor_w_rl_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amoor_w_aqrl_nb((volatile int32_t *)pAddr, value); break; + } + break; + case 8: + switch (cmdProps.getAmoMode()) { + default: nmc_amoor_d_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoAqMode: nmc_amoor_d_aq_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoRlMode: nmc_amoor_d_rl_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amoor_d_aqrl_nb((volatile int64_t *)pAddr, value); break; + } + break; + default: + assert(0 && "Invalid type"); + } + return 0; + } else { + switch (sizeof(T)) { + case 4: + switch (cmdProps.getAmoMode()) { + default: return nmc_amoor_w((volatile int32_t *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amoor_w_aq((volatile int32_t *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amoor_w_rl((volatile int32_t *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amoor_w_aqrl((volatile int32_t *)pAddr, value); break; + } + break; + case 8: + switch (cmdProps.getAmoMode()) { + default: return nmc_amoor_d((volatile int64_t *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amoor_d_aq((volatile int64_t *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amoor_d_rl((volatile int64_t *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amoor_d_aqrl((volatile int64_t *)pAddr, value); break; + } + break; + default: + assert(0 && "Invalid type"); + return 0; + } + } +} + +template<typename T = uint32_t> +inline T nmcAtomicAnd(NmcTeCmdProps const &cmdProps, T *pAddr, T value) { + if (cmdProps.isNonBlocking()) { + switch (sizeof(T)) { + case 4: + switch (cmdProps.getAmoMode()) { + default: nmc_amoand_w_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoAqMode: nmc_amoand_w_aq_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoRlMode: nmc_amoand_w_rl_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amoand_w_aqrl_nb((volatile int32_t *)pAddr, value); break; + } + break; + case 8: + switch (cmdProps.getAmoMode()) { + default: nmc_amoand_d_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoAqMode: nmc_amoand_d_aq_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoRlMode: nmc_amoand_d_rl_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amoand_d_aqrl_nb((volatile int64_t *)pAddr, value); break; + } + break; + default: + assert(0 && "Invalid type"); + } + return 0; + } else { + switch (sizeof(T)) { + case 4: + switch (cmdProps.getAmoMode()) { + default: return nmc_amoand_w((volatile int32_t *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amoand_w_aq((volatile int32_t *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amoand_w_rl((volatile int32_t *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amoand_w_aqrl((volatile int32_t *)pAddr, value); break; + } + break; + case 8: + switch (cmdProps.getAmoMode()) { + default: return nmc_amoand_d((volatile int64_t *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amoand_d_aq((volatile int64_t *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amoand_d_rl((volatile int64_t *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amoand_d_aqrl((volatile int64_t *)pAddr, value); break; + } + break; + default: + assert(0 && "Invalid type"); + return 0; + } + } +} + +template<typename T = uint32_t> +inline T nmcAtomicSwap(NmcTeCmdProps const &cmdProps, T *pAddr, T value) { + if (cmdProps.isNonBlocking()) { + switch (sizeof(T)) { + case 4: + switch (cmdProps.getAmoMode()) { + default: nmc_amoswap_w_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoAqMode: nmc_amoswap_w_aq_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoRlMode: nmc_amoswap_w_rl_nb((volatile int32_t *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amoswap_w_aqrl_nb((volatile int32_t *)pAddr, value); break; + } + break; + case 8: + switch (cmdProps.getAmoMode()) { + default: nmc_amoswap_d_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoAqMode: nmc_amoswap_d_aq_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoRlMode: nmc_amoswap_d_rl_nb((volatile int64_t *)pAddr, value); break; + case NmcAmoAqRlMode: nmc_amoswap_d_aqrl_nb((volatile int64_t *)pAddr, value); break; + } + break; + default: + assert(0 && "Invalid type"); + } + return 0; + } else { + switch (sizeof(T)) { + case 4: + switch (cmdProps.getAmoMode()) { + default: return nmc_amoswap_w((volatile int32_t *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amoswap_w_aq((volatile int32_t *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amoswap_w_rl((volatile int32_t *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amoswap_w_aqrl((volatile int32_t *)pAddr, value); break; + } + break; + case 8: + switch (cmdProps.getAmoMode()) { + default: return nmc_amoswap_d((volatile int64_t *)pAddr, value); break; + case NmcAmoAqMode: return nmc_amoswap_d_aq((volatile int64_t *)pAddr, value); break; + case NmcAmoRlMode: return nmc_amoswap_d_rl((volatile int64_t *)pAddr, value); break; + case NmcAmoAqRlMode: return nmc_amoswap_d_aqrl((volatile int64_t *)pAddr, value); break; + } + break; + default: + assert(0 && "Invalid type"); + return 0; + } + } +} + +template<typename T = uint32_t> +inline T nmcAtomicCas(NmcTeCmdProps const &cmdProps, T *pAddr, T cmpValue, T swapValue) { + if (cmdProps.isNonBlocking()) { + switch (sizeof(T)) { + case 4: + switch (cmdProps.getAmoMode()) { + default: nmc_amocas_w_nb((volatile int32_t *)pAddr, cmpValue, swapValue); break; + case NmcAmoAqMode: nmc_amocas_w_aq_nb((volatile int32_t *)pAddr, cmpValue, swapValue); break; + case NmcAmoRlMode: nmc_amocas_w_rl_nb((volatile int32_t *)pAddr, cmpValue, swapValue); break; + case NmcAmoAqRlMode: nmc_amocas_w_aqrl_nb((volatile int32_t *)pAddr, cmpValue, swapValue); break; + } + break; + case 8: + switch (cmdProps.getAmoMode()) { + default: nmc_amocas_d_nb((volatile int64_t *)pAddr, cmpValue, swapValue); break; + case NmcAmoAqMode: nmc_amocas_d_aq_nb((volatile int64_t *)pAddr, cmpValue, swapValue); break; + case NmcAmoRlMode: nmc_amocas_d_rl_nb((volatile int64_t *)pAddr, cmpValue, swapValue); break; + case NmcAmoAqRlMode: nmc_amocas_d_aqrl_nb((volatile int64_t *)pAddr, cmpValue, swapValue); break; + } + break; + default: + assert(0 && "Invalid type"); + } + return 0; + } else { + switch (sizeof(T)) { + case 4: + switch (cmdProps.getAmoMode()) { + default: return nmc_amocas_w((volatile int32_t *)pAddr, cmpValue, swapValue); break; + case NmcAmoAqMode: return nmc_amocas_w_aq((volatile int32_t *)pAddr, cmpValue, swapValue); break; + case NmcAmoRlMode: return nmc_amocas_w_rl((volatile int32_t *)pAddr, cmpValue, swapValue); break; + case NmcAmoAqRlMode: return nmc_amocas_w_aqrl((volatile int32_t *)pAddr, cmpValue, swapValue); break; + } + break; + case 8: + switch (cmdProps.getAmoMode()) { + default: return nmc_amocas_d((volatile int64_t *)pAddr, cmpValue, swapValue); break; + case NmcAmoAqMode: return nmc_amocas_d_aq((volatile int64_t *)pAddr, cmpValue, swapValue); break; + case NmcAmoRlMode: return nmc_amocas_d_rl((volatile int64_t *)pAddr, cmpValue, swapValue); break; + case NmcAmoAqRlMode: return nmc_amocas_d_aqrl((volatile int64_t *)pAddr, cmpValue, swapValue); break; + } + break; + default: + assert(0 && "Invalid type"); + return 0; + } + } +} + +template<typename T = uint32_t> +void nmcStore(NmcTeCmdProps const &cmdProps, T volatile *pAddr, T data) +{ + if (cmdProps.isNonTemporal()) { + if (std::is_floating_point<T>::value) { + switch (sizeof(T)) { + case 4: nmc_fsw_nt((volatile float *)pAddr, (float)data); return; + case 8: nmc_fsd_nt((volatile double *)pAddr, (double)data); return; + default: assert(0 && "Invalid type"); + + } + } else { + switch (sizeof(T)) { + case 1: nmc_sb_nt((volatile uint8_t *)pAddr, (uint8_t)data); return; + case 2: nmc_sh_nt((volatile uint16_t *)pAddr, (uint16_t)data); return; + case 4: nmc_sw_nt((volatile uint32_t *)pAddr, (uint32_t)data); return; + case 8: nmc_sd_nt((volatile uint64_t *)pAddr, (uint64_t)data); return; + default: assert(0 && "Invalid type"); + } + } + } else { + *pAddr = data; + } +} + +template<typename T = uint32_t> +T nmcLoad(NmcTeCmdProps const &cmdProps, T volatile *pAddr) +{ + if (cmdProps.isNonTemporal()) { + if (std::is_floating_point<T>::value) { + switch (sizeof(T)) { + case 4: return nmc_flw_nt((volatile float *)pAddr); + case 8: return nmc_fld_nt((volatile double *)pAddr); + default: assert(0 && "Invalid type"); + } + } else { + if (std::is_signed<T>::value) { + switch (sizeof(T)) { + case 1: return nmc_lb_nt((volatile int8_t *)pAddr); + case 2: return nmc_lh_nt((volatile int16_t *)pAddr); + case 4: return nmc_lw_nt((volatile int32_t *)pAddr); + case 8: return nmc_ld_nt((volatile int64_t *)pAddr); + default: assert(0 && "Invalid type"); + } + } else { + switch (sizeof(T)) { + case 1: return nmc_lbu_nt((volatile uint8_t *)pAddr); + case 2: return nmc_lhu_nt((volatile uint16_t *)pAddr); + case 4: return nmc_lwu_nt((volatile uint32_t *)pAddr); + case 8: return nmc_ld_nt((volatile int64_t *)pAddr); + default: assert(0 && "Invalid type"); + } + } + } + } else if (cmdProps.isSpacial()) { + if (std::is_floating_point<T>::value) { + switch (sizeof(T)) { + case 4: return nmc_flw_sp((volatile float *)pAddr); + case 8: return nmc_fld_sp((volatile double *)pAddr); + default: assert(0 && "Invalid type"); + } + } else { + if (std::is_signed<T>::value) { + switch (sizeof(T)) { + case 1: return nmc_lb_sp((volatile int8_t *)pAddr); + case 2: return nmc_lh_sp((volatile int16_t *)pAddr); + case 4: return nmc_lw_sp((volatile int32_t *)pAddr); + case 8: return nmc_ld_sp((volatile int64_t *)pAddr); + default: assert(0 && "Invalid type"); + } + } else { + switch (sizeof(T)) { + case 1: return nmc_lbu_sp((volatile uint8_t *)pAddr); + case 2: return nmc_lhu_sp((volatile uint16_t *)pAddr); + case 4: return nmc_lwu_sp((volatile uint32_t *)pAddr); + case 8: return nmc_ld_sp((volatile int64_t *)pAddr); + default: assert(0 && "Invalid type"); + } + } + } + } else { + return *pAddr; + } + return 0; +} + +inline uint64_t nmcGetDeviceId() { + return nmc_xid_dev(); +} + +inline uint64_t nmcGetTeId() { + return nmc_xid_te(); +} + +inline uint64_t nmcGetCoreId() { + return nmc_xid_core(); +} + +inline uint64_t nmcGetThreadId() { + return nmc_xid_thrd(); +} + +inline uint64_t nmcTzc(uint64_t data) { + return nmc_tzc(data); +} + +inline void nmcLowerThreadPriority() { + nmc_xlp(); +} + +inline void nmcNormalThreadPriority() { + nmc_xnp(); +} diff --git a/src/micron/nmc_te_intrin.h b/src/micron/nmc_te_intrin.h new file mode 100644 index 00000000..83e08bb9 --- /dev/null +++ b/src/micron/nmc_te_intrin.h @@ -0,0 +1,2856 @@ +/* + * Copyright (C) 2024 Micron Technology, Inc. + * + * This file is the confidential and proprietary property of + * Micron Technology, Inc. + */ +#pragma once +#include <stdint.h> +#include "nmc_errno.h" + +/* + * c++ & __llvm__ does not support register keyword. c++ 17 also + * does not support the keyword register. + */ +#if defined(__cplusplus) + #if defined(__llvm__) + #define REG_NOT_SUPPORT 1 + #endif + #if !defined(REG_NOT_SUPPORT) && __cplusplus >= 201703L + #define REG_NOT_SUPPORT 2 + #endif +#endif + +#ifdef REG_NOT_SUPPORT +#define REGISTER +#define ASMR(x) +#else +#define REGISTER register +#define ASMR(x) asm(x) +#endif + +#undef assert +#define assert(x) do { if (!(x)) __nmc_te_assert(__FILE__, __LINE__, #x); } while (0) + +#ifndef INLINE +#define INLINE inline +#endif + +#ifdef __cplusplus +extern "C" { +#endif + int __nmc_te_assert(const char *file, int line, const char *exp); + int nmc_te_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2))); + int nmc_te_printf_time(const char *fmt, ...) __attribute__((format(printf, 1, 2))); + + +#ifdef ENABLE_EMULATION + #ifdef __cplusplus + } + #endif + #include "nmc_te_emu_intrin.h" +#else + + INLINE uint64_t fmv_x_s(float din) + { + uint64_t dout; + asm("fmv.x.s %0,%1" : "=r" (dout) : "f" (din)); + return dout; + } + + INLINE uint64_t fmv_x_d(double din) + { + uint64_t dout; + asm("fmv.x.d %0,%1" : "=r" (dout) : "f" (din)); + return dout; + } + + + // XFC C0 + + INLINE void nmc_xfc_te_c0_r0(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.te.c0.r0" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + INLINE void nmc_xfc_te_c0_r0_at(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.te.c0.r0.at" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + INLINE void nmc_xfc_te_c0_r1(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.te.c0.r1" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + INLINE void nmc_xfc_te_c0_r1_at(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.te.c0.r1.at" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + INLINE void nmc_xfc_te_c0_r2(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.te.c0.r2" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + INLINE void nmc_xfc_te_c0_r2_at(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.te.c0.r2.at" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + INLINE void nmc_xfc_te_c0_nr(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.te.c0.nr" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + INLINE void nmc_xfc_te_c0_nr_at(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.te.c0.nr.at" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + // XFC C0 NF + + INLINE void nmc_xfc_te_c0_r0_nf(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.te.c0.r0.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + INLINE void nmc_xfc_te_c0_r0_nf_at(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.te.c0.r0.nf.at" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + INLINE void nmc_xfc_te_c0_r1_nf(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.te.c0.r1.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + INLINE void nmc_xfc_te_c0_r1_nf_at(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.te.c0.r1.nf.at" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + INLINE void nmc_xfc_te_c0_r2_nf(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.te.c0.r2.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + INLINE void nmc_xfc_te_c0_r2_nf_at(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.te.c0.r2.nf.at" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + INLINE void nmc_xfc_te_c0_nr_nf(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.te.c0.nr.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + INLINE void nmc_xfc_te_c0_nr_nf_at(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.te.c0.nr.nf.at" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + // XFC C0 BF + + INLINE int64_t nmc_xfc_te_bf_c0_r0(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c0.r0" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c0_r1(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c0.r1" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c0_r2(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c0.r2" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c0_nr(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c0.nr" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps)); + return status; + } + + // XFC C0 NF BF + + INLINE int64_t nmc_xfc_te_bf_c0_r0_nf(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c0.r0.nf" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c0_r1_nf(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c0.r1.nf" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c0_r2_nf(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c0.r2.nf" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c0_nr_nf(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c0.nr.nf" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps)); + return status; + } + + // XFC C1 + + INLINE void nmc_xfc_te_c1_r0(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.te.c1.r0" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + INLINE void nmc_xfc_te_c1_r0_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.te.c1.r0.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + INLINE void nmc_xfc_te_c1_r1(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.te.c1.r1" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + INLINE void nmc_xfc_te_c1_r1_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.te.c1.r1.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + INLINE void nmc_xfc_te_c1_r2(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.te.c1.r2" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + INLINE void nmc_xfc_te_c1_r2_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.te.c1.r2.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + INLINE void nmc_xfc_te_c1_nr(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.te.c1.nr" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + INLINE void nmc_xfc_te_c1_nr_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.te.c1.nr.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + // XFC C1 NF + + INLINE void nmc_xfc_te_c1_r0_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.te.c1.r0.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + INLINE void nmc_xfc_te_c1_r0_nf_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.te.c1.r0.nf.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + INLINE void nmc_xfc_te_c1_r1_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.te.c1.r1.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + INLINE void nmc_xfc_te_c1_r1_nf_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.te.c1.r1.nf.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + INLINE void nmc_xfc_te_c1_r2_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.te.c1.r2.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + INLINE void nmc_xfc_te_c1_r2_nf_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.te.c1.r2.nf.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + INLINE void nmc_xfc_te_c1_nr_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.te.c1.nr.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + INLINE void nmc_xfc_te_c1_nr_nf_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.te.c1.nr.nf.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + // XFC C1 BF + + INLINE int64_t nmc_xfc_te_bf_c1_r0(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c1.r0" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c1_r1(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c1.r1" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c1_r2(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c1.r2" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c1_nr(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c1.nr" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + return status; + } + + // XFC C1 NF BF + + INLINE int64_t nmc_xfc_te_bf_c1_r0_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c1.r0.nf" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c1_r1_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c1.r1.nf" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c1_r2_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c1.r2.nf" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c1_nr_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c1.nr.nf" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + return status; + } + + // XFC C2 + + INLINE void nmc_xfc_te_c2_r0(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.te.c2.r0" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + INLINE void nmc_xfc_te_c2_r0_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.te.c2.r0.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + INLINE void nmc_xfc_te_c2_r1(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.te.c2.r1" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + INLINE void nmc_xfc_te_c2_r1_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.te.c2.r1.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + INLINE void nmc_xfc_te_c2_r2(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.te.c2.r2" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + INLINE void nmc_xfc_te_c2_r2_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.te.c2.r2.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + INLINE void nmc_xfc_te_c2_nr(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.te.c2.nr" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + INLINE void nmc_xfc_te_c2_nr_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.te.c2.nr.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + // XFC C2 NF + + INLINE void nmc_xfc_te_c2_r0_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.te.c2.r0.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + INLINE void nmc_xfc_te_c2_r0_nf_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.te.c2.r0.nf.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + INLINE void nmc_xfc_te_c2_r1_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.te.c2.r1.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + INLINE void nmc_xfc_te_c2_r1_nf_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.te.c2.r1.nf.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + INLINE void nmc_xfc_te_c2_r2_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.te.c2.r2.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + INLINE void nmc_xfc_te_c2_r2_nf_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.te.c2.r2.nf.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + INLINE void nmc_xfc_te_c2_nr_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.te.c2.nr.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + INLINE void nmc_xfc_te_c2_nr_nf_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.te.c2.nr.nf.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + // XFC C2 BF + + INLINE int64_t nmc_xfc_te_bf_c2_r0(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c2.r0" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c2_r1(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c2.r1" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c2_r2(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c2.r2" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c2_nr(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c2.nr" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + return status; + } + + // XFC C2 NF BF + + INLINE int64_t nmc_xfc_te_bf_c2_r0_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c2.r0.nf" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c2_r1_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c2.r1.nf" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c2_r2_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c2.r2.nf" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c2_nr_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c2.nr.nf" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + return status; + } + + // XFC C4 + + INLINE void nmc_xfc_te_c4_r0(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.te.c4.r0" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + INLINE void nmc_xfc_te_c4_r0_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.te.c4.r0.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + INLINE void nmc_xfc_te_c4_r1(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.te.c4.r1" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + INLINE void nmc_xfc_te_c4_r1_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.te.c4.r1.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + INLINE void nmc_xfc_te_c4_r2(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.te.c4.r2" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + INLINE void nmc_xfc_te_c4_r2_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.te.c4.r2.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + INLINE void nmc_xfc_te_c4_nr(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.te.c4.nr" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + INLINE void nmc_xfc_te_c4_nr_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.te.c4.nr.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + // XFC C4 + + INLINE void nmc_xfc_te_c4_r0_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.te.c4.r0.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + INLINE void nmc_xfc_te_c4_r0_nf_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.te.c4.r0.nf.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + INLINE void nmc_xfc_te_c4_r1_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.te.c4.r1.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + INLINE void nmc_xfc_te_c4_r1_nf_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.te.c4.r1.nf.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + INLINE void nmc_xfc_te_c4_r2_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.te.c4.r2.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + INLINE void nmc_xfc_te_c4_r2_nf_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.te.c4.r2.nf.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + INLINE void nmc_xfc_te_c4_nr_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.te.c4.nr.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + INLINE void nmc_xfc_te_c4_nr_nf_at(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.te.c4.nr.nf.at" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + // XFC C4 BF + + INLINE int64_t nmc_xfc_te_bf_c4_r0(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c4.r0" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c4_r1(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c4.r1" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c4_r2(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c4.r2" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c4_nr(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c4.nr" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + return status; + } + + // XFC C4 NF BF + + INLINE int64_t nmc_xfc_te_bf_c4_r0_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c4.r0.nf" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c4_r1_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c4.r1.nf" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c4_r2_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c4.r2.nf" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + return status; + } + + INLINE int64_t nmc_xfc_te_bf_c4_nr_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + REGISTER int64_t status ASMR("x10"); + asm volatile("xfc.te.bf.c4.nr.nf" : "=r" (status) : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + return status; + } + + // XFC C0 + + INLINE void nmc_xfc_se_c0_r0(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.se.c0.r0" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + INLINE void nmc_xfc_se_c0_r1(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.se.c0.r1" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + INLINE void nmc_xfc_se_c0_r2(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.se.c0.r2" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + INLINE void nmc_xfc_se_c0_nr(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.se.c0.nr" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + // XFC C0 NF + + INLINE void nmc_xfc_se_c0_r0_nf(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.se.c0.r0.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + INLINE void nmc_xfc_se_c0_r1_nf(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.se.c0.r1.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + INLINE void nmc_xfc_se_c0_r2_nf(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.se.c0.r2.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + INLINE void nmc_xfc_se_c0_nr_nf(void *_daddr, void *_pc, uint64_t _cmdProps) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + asm volatile("xfc.se.c0.nr.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps)); + } + + // XFC C1 + + INLINE void nmc_xfc_se_c1_r0(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.se.c1.r0" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + INLINE void nmc_xfc_se_c1_r1(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.se.c1.r1" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + INLINE void nmc_xfc_se_c1_r2(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.se.c1.r2" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + INLINE void nmc_xfc_se_c1_nr(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.se.c1.nr" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + // XFC C1 NF + + INLINE void nmc_xfc_se_c1_r0_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.se.c1.r0.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + INLINE void nmc_xfc_se_c1_r1_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.se.c1.r1.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + INLINE void nmc_xfc_se_c1_r2_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.se.c1.r2.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + INLINE void nmc_xfc_se_c1_nr_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + asm volatile("xfc.se.c1.nr.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1)); + } + + // XFC C2 + + INLINE void nmc_xfc_se_c2_r0(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.se.c2.r0" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + INLINE void nmc_xfc_se_c2_r1(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.se.c2.r1" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + INLINE void nmc_xfc_se_c2_r2(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.se.c2.r2" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + INLINE void nmc_xfc_se_c2_nr(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.se.c2.nr" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + // XFC C2 NF + + INLINE void nmc_xfc_se_c2_r0_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.se.c2.r0.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + INLINE void nmc_xfc_se_c2_r1_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.se.c2.r1.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + INLINE void nmc_xfc_se_c2_r2_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.se.c2.r2.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + INLINE void nmc_xfc_se_c2_nr_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + asm volatile("xfc.se.c2.nr.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2)); + } + + // XFC C4 + + INLINE void nmc_xfc_se_c4_r0(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.se.c4.r0" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + INLINE void nmc_xfc_se_c4_r1(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.se.c4.r1" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + INLINE void nmc_xfc_se_c4_r2(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.se.c4.r2" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + INLINE void nmc_xfc_se_c4_nr(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.se.c4.nr" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + // XFC C4 + + INLINE void nmc_xfc_se_c4_r0_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.se.c4.r0.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + INLINE void nmc_xfc_se_c4_r1_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.se.c4.r1.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + INLINE void nmc_xfc_se_c4_r2_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.se.c4.r2.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + INLINE void nmc_xfc_se_c4_nr_nf(void *_daddr, void *_pc, uint64_t _cmdProps, + uint64_t _a1, uint64_t _a2, uint64_t _a3, uint64_t _a4) + { + REGISTER void *daddr ASMR("x10") = _daddr; + REGISTER void *pc ASMR("x11") = _pc; + REGISTER uint64_t cmdProps ASMR("x12") = _cmdProps; + REGISTER uint64_t a1 ASMR("x13") = _a1; + REGISTER uint64_t a2 ASMR("x14") = _a2; + REGISTER uint64_t a3 ASMR("x15") = _a3; + REGISTER uint64_t a4 ASMR("x16") = _a4; + asm volatile("xfc.se.c4.nr.nf" : : "r" (daddr), "r" (pc), "r" (cmdProps), "r" (a1), "r" (a2), "r" (a3), "r" (a4)); + } + + // XFJ + + INLINE void nmc_xfj_r0(int64_t *_cmdId) { + REGISTER int64_t cmdId ASMR("x10"); + REGISTER uint64_t ret1 ASMR("x11"); + REGISTER uint64_t ret2 ASMR("x12"); + asm volatile("xfj" : "=r" (cmdId), "=r" (ret1), "=r" (ret2)); + *_cmdId = cmdId; + } + + INLINE void nmc_xfj_r1(int64_t *_cmdId, uint64_t *_ret1) { + REGISTER int64_t cmdId ASMR("x10"); + REGISTER uint64_t ret1 ASMR("x11"); + REGISTER uint64_t ret2 ASMR("x12"); + asm volatile("xfj" : "=r" (cmdId), "=r" (ret1), "=r" (ret2)); + *_cmdId = cmdId; + *_ret1 = ret1; + } + + INLINE void nmc_xfj_r2(int64_t *_cmdId, uint64_t *_ret1, uint64_t *_ret2) { + REGISTER int64_t cmdId ASMR("x10"); + REGISTER uint64_t ret1 ASMR("x11"); + REGISTER uint64_t ret2 ASMR("x12"); + asm volatile("xfj" : "=r" (cmdId), "=r" (ret1), "=r" (ret2)); + *_cmdId = cmdId; + *_ret1 = ret1; + *_ret2 = ret2; + } + + INLINE void nmc_xfj_r0_ni(int64_t *_cmdId) { + REGISTER int64_t cmdId ASMR("x10"); + REGISTER uint64_t ret1 ASMR("x11"); + REGISTER uint64_t ret2 ASMR("x12"); + asm volatile("xfj.ni" : "=r" (cmdId), "=r" (ret1), "=r" (ret2)); + *_cmdId = cmdId; + } + + INLINE void nmc_xfj_r1_ni(int64_t *_cmdId, uint64_t *_ret1) { + REGISTER int64_t cmdId ASMR("x10"); + REGISTER uint64_t ret1 ASMR("x11"); + REGISTER uint64_t ret2 ASMR("x12"); + asm volatile("xfj.ni" : "=r" (cmdId), "=r" (ret1), "=r" (ret2)); + *_cmdId = cmdId; + *_ret1 = ret1; + } + + INLINE void nmc_xfj_r2_ni(int64_t *_cmdId, uint64_t *_ret1, uint64_t *_ret2) { + REGISTER int64_t cmdId ASMR("x10"); + REGISTER uint64_t ret1 ASMR("x11"); + REGISTER uint64_t ret2 ASMR("x12"); + asm volatile("xfj.ni" : "=r" (cmdId), "=r" (ret1), "=r" (ret2)); + *_cmdId = cmdId; + *_ret1 = ret1; + *_ret2 = ret2; + } + + INLINE void nmc_xfj_r0_nb(int64_t *_cmdId) { + REGISTER int64_t cmdId ASMR("x10"); + REGISTER uint64_t ret1 ASMR("x11"); + REGISTER uint64_t ret2 ASMR("x12"); + asm volatile("xfj.nb" : "=r" (cmdId), "=r" (ret1), "=r" (ret2)); + *_cmdId = cmdId; + } + + INLINE void nmc_xfj_r1_nb(int64_t *_cmdId, uint64_t *_ret1) { + REGISTER int64_t cmdId ASMR("x10"); + REGISTER uint64_t ret1 ASMR("x11"); + REGISTER uint64_t ret2 ASMR("x12"); + asm volatile("xfj.nb" : "=r" (cmdId), "=r" (ret1), "=r" (ret2)); + *_cmdId = cmdId; + *_ret1 = ret1; + } + + INLINE void nmc_xfj_r2_nb(int64_t *_cmdId, uint64_t *_ret1, uint64_t *_ret2) { + REGISTER int64_t cmdId ASMR("x10"); + REGISTER uint64_t ret1 ASMR("x11"); + REGISTER uint64_t ret2 ASMR("x12"); + asm volatile("xfj.nb" : "=r" (cmdId), "=r" (ret1), "=r" (ret2)); + *_cmdId = cmdId; + *_ret1 = ret1; + *_ret2 = ret2; + } + + INLINE void nmc_xfj_r0_nb_ni(int64_t *_cmdId) { + REGISTER int64_t cmdId ASMR("x10"); + REGISTER uint64_t ret1 ASMR("x11"); + REGISTER uint64_t ret2 ASMR("x12"); + asm volatile("xfj.nb.ni" : "=r" (cmdId), "=r" (ret1), "=r" (ret2)); + *_cmdId = cmdId; + } + + INLINE void nmc_xfj_r1_nb_ni(int64_t *_cmdId, uint64_t *_ret1) { + REGISTER int64_t cmdId ASMR("x10"); + REGISTER uint64_t ret1 ASMR("x11"); + REGISTER uint64_t ret2 ASMR("x12"); + asm volatile("xfj.nb.ni" : "=r" (cmdId), "=r" (ret1), "=r" (ret2)); + *_cmdId = cmdId; + *_ret1 = ret1; + } + + INLINE void nmc_xfj_r2_nb_ni(int64_t *_cmdId, uint64_t *_ret1, uint64_t *_ret2) { + REGISTER int64_t cmdId ASMR("x10"); + REGISTER uint64_t ret1 ASMR("x11"); + REGISTER uint64_t ret2 ASMR("x12"); + asm volatile("xfj.nb.ni" : "=r" (cmdId), "=r" (ret1), "=r" (ret2)); + *_cmdId = cmdId; + *_ret1 = ret1; + *_ret2 = ret2; + } + + INLINE int64_t nmc_xfja() { + REGISTER int64_t rtn ASMR("x10"); + asm volatile("xfja" : "=r" (rtn)); + return rtn; + } + + INLINE int64_t nmc_xfja_nb() { + REGISTER int64_t rtn ASMR("x10"); + asm volatile("xfja.nb" : "=r" (rtn)); + return rtn; + } + + INLINE int64_t nmc_xfja_ni() { + REGISTER int64_t rtn ASMR("x10"); + asm volatile("xfja.ni" : "=r" (rtn)); + return rtn; + } + + INLINE int64_t nmc_xfja_ni_nb() { + REGISTER int64_t rtn ASMR("x10"); + asm volatile("xfja.ni.nb" : "=r" (rtn)); + return rtn; + } + + // XTR + + INLINE void nmc_xtr_r0() { + asm volatile("xtr.r0"); + } + + INLINE void nmc_xtr_r1(uint64_t a1) { + asm volatile("xtr.r1 %0" : : "r" (a1)); + } + + INLINE void nmc_xtr_r2(uint64_t a1, uint64_t a2) { + asm volatile("xtr.r2 %0,%1" : : "r" (a1), "r" (a2)); + } + + INLINE double nmc_csr_rdtime() { + double dest; + asm("rdtime %0" : "=r" (dest)); + return dest; + } + + INLINE uint64_t nmc_csr_rdcycle() { + uint64_t dest; + asm("rdcycle %0" : "=r" (dest)); + return dest; + } + + // + // Event routines + // + INLINE void nmc_xem_sm(uint64_t eventNum) { + asm volatile("xem.sm %0" : : "r" (eventNum)); + } + INLINE void nmc_xem_cm(uint64_t eventNum, uint64_t eventCount) { + asm volatile("xem.cm %0,%1" : : "r" (eventNum), "r" (eventCount)); + } + INLINE void nmc_xem_bm(uint64_t eventNum, uint64_t recvChan) { + asm volatile("xem.bm %0,%1" : : "r" (eventNum), "r" (recvChan)); + } + + INLINE uint64_t nmc_xed(uint64_t eventNum) { + uint64_t dest; + asm("xed %0,%1" : "=r" (dest) : "r" (eventNum)); + return dest; + } + + INLINE void nmc_xes(uint64_t eventDest) { + asm volatile("xes %0" : : "r" (eventDest)); + } + INLINE void nmc_xes_nf(uint64_t _eventDest) { + REGISTER uint64_t eventDest = _eventDest; + asm volatile("xes.nf %0" : : "r" (eventDest)); + } + + INLINE void nmc_xes_d(uint64_t eventDest, uint64_t eventData) { + asm volatile("xes.d %0,%1" : : "r" (eventDest), "r" (eventData)); + } + INLINE void nmc_xes_d_nf(uint64_t eventDest, uint64_t eventData) { + asm volatile("xes.d.nf %0,%1" : : "r" (eventDest), "r" (eventData)); + } + + INLINE void nmc_xeb(uint64_t eventNum, uint64_t Chan) { + uint64_t eventDest = (Chan << 8) | eventNum; + asm volatile("xeb %0" : : "r" (eventDest)); + } + INLINE void nmc_xeb_nf(uint64_t eventNum, uint64_t Chan) { + uint64_t eventDest = (Chan << 8) | eventNum; + asm volatile("xeb.nf %0" : : "r" (eventDest)); + } + + INLINE void nmc_xeb_d(uint64_t eventNum, uint64_t Chan, uint64_t eventData) { + uint64_t eventDest = (Chan << 8) | eventNum; + asm volatile("xeb.d %0,%1" : : "r" (eventDest), "r" (eventData)); + } + INLINE void nmc_xeb_d_nf(uint64_t eventNum, uint64_t Chan, uint64_t eventData) { + uint64_t eventDest = (Chan << 8) | eventNum; + asm volatile("xeb.d.nf %0,%1" : : "r" (eventDest), "r" (eventData)); + } + + INLINE uint64_t nmc_xel(uint64_t eventMask) { + uint64_t rtnMask; + asm volatile("xel %0,%1" : "=r" (rtnMask) : "r" (eventMask)); + return rtnMask; + } + INLINE uint64_t nmc_xel_nb(uint64_t eventMask) { + uint64_t rtnMask; + asm volatile("xel.nb %0,%1" : "=r" (rtnMask) : "r" (eventMask)); + return rtnMask; + } + + INLINE void nmc_xer(uint64_t eventNum) { + asm volatile("xer %0" : : "r" (eventNum)); + } + INLINE void nmc_xer_ni(uint64_t eventNum) { + asm volatile("xer.ni %0" : : "r" (eventNum)); + } + + INLINE uint64_t nmc_xer_d(uint64_t eventNum) { + uint64_t evData; + asm volatile("xer.d %0,%1" : "=r" (evData) : "r" (eventNum)); + return evData; + } + INLINE uint64_t nmc_xer_d_ni(uint64_t eventNum) { + uint64_t evData; + asm volatile("xer.d.ni %0,%1" : "=r" (evData) : "r" (eventNum)); + return evData; + } + + // Atomic memory operations + + // AMOADD + + INLINE int32_t nmc_amoadd_w(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amoadd.w %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int32_t nmc_amoadd_w_aq(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amoadd.w.aq %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int32_t nmc_amoadd_w_rl(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amoadd.w.rl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int32_t nmc_amoadd_w_aqrl(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amoadd.w.aqrl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + + INLINE void nmc_amoadd_w_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amoadd.w.nr %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoadd_w_aq_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amoadd.w.nr.aq %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoadd_w_rl_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amoadd.w.nr.rl %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoadd_w_aqrl_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amoadd.w.nr.aqrl %1,(%0)" : : "r" (addr), "r" (din)); + } + + INLINE int64_t nmc_amoadd_d(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amoadd.d %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int64_t nmc_amoadd_d_aq(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amoadd.d.aq %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int64_t nmc_amoadd_d_rl(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amoadd.d.rl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int64_t nmc_amoadd_d_aqrl(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amoadd.d.aqrl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + + INLINE void nmc_amoadd_d_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amoadd.d.nr %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoadd_d_aq_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amoadd.d.nr.aq %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoadd_d_rl_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amoadd.d.nr.rl %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoadd_d_aqrl_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amoadd.d.nr.aqrl %1,(%0)" : : "r" (addr), "r" (din)); + } + + // AMOXOR + + INLINE int32_t nmc_amoxor_w(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amoxor.w %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int32_t nmc_amoxor_w_aq(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amoxor.w.aq %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int32_t nmc_amoxor_w_rl(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amoxor.w.rl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int32_t nmc_amoxor_w_aqrl(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amoxor.w.aqrl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + + INLINE void nmc_amoxor_w_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amoxor.w.nr %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoxor_w_aq_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amoxor.w.nr.aq %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoxor_w_rl_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amoxor.w.nr.rl %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoxor_w_aqrl_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amoxor.w.nr.aqrl %1,(%0)" : : "r" (addr), "r" (din)); + } + + INLINE int64_t nmc_amoxor_d(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amoxor.d %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int64_t nmc_amoxor_d_aq(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amoxor.d.aq %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int64_t nmc_amoxor_d_rl(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amoxor.d.rl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int64_t nmc_amoxor_d_aqrl(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amoxor.d.aqrl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + + INLINE void nmc_amoxor_d_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amoxor.d.nr %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoxor_d_aq_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amoxor.d.nr.aq %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoxor_d_rl_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amoxor.d.nr.rl %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoxor_d_aqrl_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amoxor.d.nr.aqrl %1,(%0)" : : "r" (addr), "r" (din)); + } + + // AMOOR + + INLINE int32_t nmc_amoor_w(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amoor.w %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int32_t nmc_amoor_w_aq(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amoor.w.aq %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int32_t nmc_amoor_w_rl(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amoor.w.rl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int32_t nmc_amoor_w_aqrl(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amoor.w.aqrl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + + INLINE void nmc_amoor_w_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amoor.w.nr %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoor_w_aq_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amoor.w.nr.aq %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoor_w_rl_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amoor.w.nr.rl %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoor_w_aqrl_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amoor.w.nr.aqrl %1,(%0)" : : "r" (addr), "r" (din)); + } + + INLINE int64_t nmc_amoor_d(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amoor.d %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int64_t nmc_amoor_d_aq(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amoor.d.aq %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int64_t nmc_amoor_d_rl(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amoor.d.rl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int64_t nmc_amoor_d_aqrl(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amoor.d.aqrl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + + INLINE void nmc_amoor_d_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amoor.d.nr %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoor_d_aq_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amoor.d.nr.aq %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoor_d_rl_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amoor.d.nr.rl %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoor_d_aqrl_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amoor.d.nr.aqrl %1,(%0)" : : "r" (addr), "r" (din)); + } + + // AMOAND + + INLINE int32_t nmc_amoand_w(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amoand.w %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int32_t nmc_amoand_w_aq(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amoand.w.aq %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int32_t nmc_amoand_w_rl(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amoand.w.rl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int32_t nmc_amoand_w_aqrl(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amoand.w.aqrl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + + INLINE void nmc_amoand_w_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amoand.w.nr %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoand_w_aq_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amoand.w.nr.aq %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoand_w_rl_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amoand.w.nr.rl %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoand_w_aqrl_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amoand.w.nr.aqrl %1,(%0)" : : "r" (addr), "r" (din)); + } + + INLINE int64_t nmc_amoand_d(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amoand.d %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int64_t nmc_amoand_d_aq(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amoand.d.aq %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int64_t nmc_amoand_d_rl(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amoand.d.rl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int64_t nmc_amoand_d_aqrl(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amoand.d.aqrl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + + INLINE void nmc_amoand_d_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amoand.d.nr %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoand_d_aq_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amoand.d.nr.aq %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoand_d_rl_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amoand.d.nr.rl %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoand_d_aqrl_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amoand.d.nr.aqrl %1,(%0)" : : "r" (addr), "r" (din)); + } + + // AMOMIN + + INLINE int32_t nmc_amomin32(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amomin.w %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int32_t nmc_amomin_w_aq(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amomin.w.aq %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int32_t nmc_amomin_w_rl(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amomin.w.rl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int32_t nmc_amomin_w_aqrl(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amomin.w.aqrl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + + INLINE void nmc_amomin_w_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amomin.w.nr %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amomin_w_aq_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amomin.w.nr.aq %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amomin_w_rl_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amomin.w.nr.rl %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amomin_w_aqrl_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amomin.w.nr.aqrl %1,(%0)" : : "r" (addr), "r" (din)); + } + + INLINE int64_t nmc_amomin_d(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amomin.d %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int64_t nmc_amomin_d_aq(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amomin.d.aq %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int64_t nmc_amomin_d_rl(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amomin.d.rl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int64_t nmc_amomin_d_aqrl(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amomin.d.aqrl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + + INLINE void nmc_amomin_d_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amomin.d.nr %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amomin_d_aq_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amomin.d.nr.aq %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amomin_d_rl_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amomin.d.nr.rl %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amomin_d_aqrl_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amomin.d.nr.aqrl %1,(%0)" : : "r" (addr), "r" (din)); + } + + // AMOMAX + + INLINE int32_t nmc_amomax_w(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amomax.w %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int32_t nmc_amomax_w_aq(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amomax.w.aq %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int32_t nmc_amomax_w_rl(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amomax.w.rl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int32_t nmc_amomax_w_aqrl(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amomax.w.aqrl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + + INLINE void nmc_amomax_w_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amomax.w.nr %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amomax_w_aq_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amomax.w.nr.aq %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amomax_w_rl_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amomax.w.nr.rl %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amomax_w_aqrl_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amomax.w.nr.aqrl %1,(%0)" : : "r" (addr), "r" (din)); + } + + INLINE int64_t nmc_amomax_d(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amomax.d %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int64_t nmc_amomax_d_aq(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amomax.d.aq %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int64_t nmc_amomax_d_rl(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amomax.d.rl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int64_t nmc_amomax_d_aqrl(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amomax.d.aqrl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + + INLINE void nmc_amomax_d_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amomax.d.nr %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amomax_d_aq_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amomax.d.nr.aq %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amomax_d_rl_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amomax.d.nr.rl %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amomax_d_aqrl_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amomax.d.nr.aqrl %1,(%0)" : : "r" (addr), "r" (din)); + } + + // AMOMINU + + INLINE uint32_t nmc_amominu_w(volatile uint32_t *addr, uint32_t din) { + REGISTER uint32_t dout; + asm volatile ("amominu.w %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE uint32_t nmc_amominu_w_aq(volatile uint32_t *addr, uint32_t din) { + REGISTER uint32_t dout; + asm volatile ("amominu.w.aq %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE uint32_t nmc_amominu_w_rl(volatile uint32_t *addr, uint32_t din) { + REGISTER uint32_t dout; + asm volatile ("amominu.w.rl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE uint32_t nmc_amominu_w_aqrl(volatile uint32_t *addr, uint32_t din) { + REGISTER uint32_t dout; + asm volatile ("amominu.w.aqrl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + + INLINE void nmc_amominu_w_nb(volatile uint32_t *addr, uint32_t din) { + asm volatile ("amominu.w.nr %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amominu_w_aq_nb(volatile uint32_t *addr, uint32_t din) { + asm volatile ("amominu.w.nr.aq %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amominu_w_rl_nb(volatile uint32_t *addr, uint32_t din) { + asm volatile ("amominu.w.nr.rl %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amominu_w_aqrl_nb(volatile uint32_t *addr, uint32_t din) { + asm volatile ("amominu.w.nr.aqrl %1,(%0)" : : "r" (addr), "r" (din)); + } + + INLINE uint64_t nmc_amominu_d(volatile uint64_t *addr, uint64_t din) { + REGISTER uint64_t dout; + asm volatile ("amominu.d %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE uint64_t nmc_amominu_d_aq(volatile uint64_t *addr, uint64_t din) { + REGISTER uint64_t dout; + asm volatile ("amominu.d.aq %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE uint64_t nmc_amominu_d_rl(volatile uint64_t *addr, uint64_t din) { + REGISTER uint64_t dout; + asm volatile ("amominu.d.rl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE uint64_t nmc_amominu_d_aqrl(volatile uint64_t *addr, uint64_t din) { + REGISTER uint64_t dout; + asm volatile ("amominu.d.aqrl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + + INLINE void nmc_amominu_d_nb(volatile uint64_t *addr, uint64_t din) { + asm volatile ("amominu.d.nr %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amominu_d_aq_nb(volatile uint64_t *addr, uint64_t din) { + asm volatile ("amominu.d.nr.aq %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amominu_d_rl_nb(volatile uint64_t *addr, uint64_t din) { + asm volatile ("amominu.d.nr.rl %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amominu_d_aqrl_nb(volatile uint64_t *addr, uint64_t din) { + asm volatile ("amominu.d.nr.aqrl %1,(%0)" : : "r" (addr), "r" (din)); + } + + // AMOMAXU + + INLINE uint32_t nmc_amomaxu_w(volatile uint32_t *addr, uint32_t din) { + REGISTER uint32_t dout; + asm volatile ("amomaxu.w %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE uint32_t nmc_amomaxu_w_aq(volatile uint32_t *addr, uint32_t din) { + REGISTER uint32_t dout; + asm volatile ("amomaxu.w.aq %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE uint32_t nmc_amomaxu_w_rl(volatile uint32_t *addr, uint32_t din) { + REGISTER uint32_t dout; + asm volatile ("amomaxu.w.rl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE uint32_t nmc_amomaxu_w_aqrl(volatile uint32_t *addr, uint32_t din) { + REGISTER uint32_t dout; + asm volatile ("amomaxu.w.aqrl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + + INLINE void nmc_amomaxu_w_nb(volatile uint32_t *addr, uint32_t din) { + asm volatile ("amomaxu.w.nr %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amomaxu_w_aq_nb(volatile uint32_t *addr, uint32_t din) { + asm volatile ("amomaxu.w.nr.aq %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amomaxu_w_rl_nb(volatile uint32_t *addr, uint32_t din) { + asm volatile ("amomaxu.w.nr.rl %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amomaxu_w_aqrl_nb(volatile uint32_t *addr, uint32_t din) { + asm volatile ("amomaxu.w.nr.aqrl %1,(%0)" : : "r" (addr), "r" (din)); + } + + INLINE uint64_t nmc_amomaxu_d(volatile uint64_t *addr, uint64_t din) { + REGISTER uint64_t dout; + asm volatile ("amomaxu.d %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE uint64_t nmc_amomaxu_d_aq(volatile uint64_t *addr, uint64_t din) { + REGISTER uint64_t dout; + asm volatile ("amomaxu.d.aq %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE uint64_t nmc_amomaxu_d_rl(volatile uint64_t *addr, uint64_t din) { + REGISTER uint64_t dout; + asm volatile ("amomaxu.d.rl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE uint64_t nmc_amomaxu_d_aqrl(volatile uint64_t *addr, uint64_t din) { + REGISTER uint64_t dout; + asm volatile ("amomaxu.d.aqrl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + + INLINE void nmc_amomaxu_d_nb(volatile uint64_t *addr, uint64_t din) { + asm volatile ("amomaxu.d.nr %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amomaxu_d_aq_nb(volatile uint64_t *addr, uint64_t din) { + asm volatile ("amomaxu.d.nr.aq %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amomaxu_d_rl_nb(volatile uint64_t *addr, uint64_t din) { + asm volatile ("amomaxu.d.nr.rl %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amomaxu_d_aqrl_nb(volatile uint64_t *addr, uint64_t din) { + asm volatile ("amomaxu.d.nr.aqrl %1,(%0)" : : "r" (addr), "r" (din)); + } + + // AMOSWAP + + INLINE int32_t nmc_amoswap_w(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amoswap.w %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int32_t nmc_amoswap_w_aq(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amoswap.w.aq %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int32_t nmc_amoswap_w_rl(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amoswap.w.rl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int32_t nmc_amoswap_w_aqrl(volatile int32_t *addr, int32_t din) { + REGISTER int32_t dout; + asm volatile ("amoswap.w.aqrl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + + INLINE void nmc_amoswap_w_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amoswap.w.nr %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoswap_w_aq_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amoswap.w.nr.aq %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoswap_w_rl_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amoswap.w.nr.rl %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoswap_w_aqrl_nb(volatile int32_t *addr, int32_t din) { + asm volatile ("amoswap.w.nr.aqrl %1,(%0)" : : "r" (addr), "r" (din)); + } + + INLINE int64_t nmc_amoswap_d(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amoswap.d %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int64_t nmc_amoswap_d_aq(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amoswap.d.aq %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int64_t nmc_amoswap_d_rl(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amoswap.d.rl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + INLINE int64_t nmc_amoswap_d_aqrl(volatile int64_t *addr, int64_t din) { + REGISTER int64_t dout; + asm volatile ("amoswap.d.aqrl %0,%2,(%1)" : "=r" (dout) : "r" (addr), "r" (din)); + return dout; + } + + INLINE void nmc_amoswap_d_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amoswap.d.nr %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoswap_d_aq_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amoswap.d.nr.aq %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoswap_d_rl_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amoswap.d.nr.rl %1,(%0)" : : "r" (addr), "r" (din)); + } + INLINE void nmc_amoswap_d_aqrl_nb(volatile int64_t *addr, int64_t din) { + asm volatile ("amoswap.d.nr.aqrl %1,(%0)" : : "r" (addr), "r" (din)); + } + + // AMOCAS + + INLINE int32_t nmc_amocas_w(volatile int32_t *addr, int32_t din1, int32_t din2) { + REGISTER int32_t dout; + asm volatile ("amocas.w %0,%2,%3,(%1)" : "=r" (dout) : "r" (addr), "r" (din1), "r" (din2)); + return dout; + } + INLINE int32_t nmc_amocas_w_aq(volatile int32_t *addr, int32_t din1, int32_t din2) { + REGISTER int32_t dout; + asm volatile ("amocas.w.aq %0,%2,%3,(%1)" : "=r" (dout) : "r" (addr), "r" (din1), "r" (din2)); + return dout; + } + INLINE int32_t nmc_amocas_w_rl(volatile int32_t *addr, int32_t din1, int32_t din2) { + REGISTER int32_t dout; + asm volatile ("amocas.w.rl %0,%2,%3,(%1)" : "=r" (dout) : "r" (addr), "r" (din1), "r" (din2)); + return dout; + } + INLINE int32_t nmc_amocas_w_aqrl(volatile int32_t *addr, int32_t din1, int32_t din2) { + REGISTER int32_t dout; + asm volatile ("amocas.w.aqrl %0,%2,%3,(%1)" : "=r" (dout) : "r" (addr), "r" (din1), "r" (din2)); + return dout; + } + + INLINE void nmc_amocas_w_nb(volatile int32_t *addr, int32_t din1, int32_t din2) { + asm volatile ("amocas.w.nr %1,%2,(%0)" : : "r" (addr), "r" (din1), "r" (din2)); + } + INLINE void nmc_amocas_w_aq_nb(volatile int32_t *addr, int32_t din1, int32_t din2) { + asm volatile ("amocas.w.nr.aq %1,%2,(%0)" : : "r" (addr), "r" (din1), "r" (din2)); + } + INLINE void nmc_amocas_w_rl_nb(volatile int32_t *addr, int32_t din1, int32_t din2) { + asm volatile ("amocas.w.nr.rl %1,%2,(%0)" : : "r" (addr), "r" (din1), "r" (din2)); + } + INLINE void nmc_amocas_w_aqrl_nb(volatile int32_t *addr, int32_t din1, int32_t din2) { + asm volatile ("amocas.w.nr.aqrl %1,%2,(%0)" : : "r" (addr), "r" (din1), "r" (din2)); + } + + INLINE int64_t nmc_amocas_d(volatile int64_t *addr, int64_t din1, int64_t din2) { + REGISTER int64_t dout; + asm volatile ("amocas.d %0,%2,%3,(%1)" : "=r" (dout) : "r" (addr), "r" (din1), "r" (din2)); + return dout; + } + INLINE int64_t nmc_amocas_d_aq(volatile int64_t *addr, int64_t din1, int64_t din2) { + REGISTER int64_t dout; + asm volatile ("amocas.d.aq %0,%2,%3,(%1)" : "=r" (dout) : "r" (addr), "r" (din1), "r" (din2)); + return dout; + } + INLINE int64_t nmc_amocas_d_rl(volatile int64_t *addr, int64_t din1, int64_t din2) { + REGISTER int64_t dout; + asm volatile ("amocas.d.rl %0,%2,%3,(%1)" : "=r" (dout) : "r" (addr), "r" (din1), "r" (din2)); + return dout; + } + INLINE int64_t nmc_amocas_d_aqrl(volatile int64_t *addr, int64_t din1, int64_t din2) { + REGISTER int64_t dout; + asm volatile ("amocas.d.aqrl %0,%2,%3,(%1)" : "=r" (dout) : "r" (addr), "r" (din1), "r" (din2)); + return dout; + } + + INLINE void nmc_amocas_d_nb(volatile int64_t *addr, int64_t din1, int64_t din2) { + asm volatile ("amocas.d.nr %1,%2,(%0)" : : "r" (addr), "r" (din1), "r" (din2)); + } + INLINE void nmc_amocas_d_aq_nb(volatile int64_t *addr, int64_t din1, int64_t din2) { + asm volatile ("amocas.d.nr.aq %1,%2,(%0)" : : "r" (addr), "r" (din1), "r" (din2)); + } + INLINE void nmc_amocas_d_rl_nb(volatile int64_t *addr, int64_t din1, int64_t din2) { + asm volatile ("amocas.d.nr.rl %1,%2,(%0)" : : "r" (addr), "r" (din1), "r" (din2)); + } + INLINE void nmc_amocas_d_aqrl_nb(volatile int64_t *addr, int64_t din1, int64_t din2) { + asm volatile ("amocas.d.nr.aqrl %1,%2,(%0)" : : "r" (addr), "r" (din1), "r" (din2)); + } + + // AMOFADD + + INLINE float nmc_amofadd_s(volatile float *addr, float din) { + float dout; + asm volatile ("amofadd.s %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + INLINE float nmc_amofadd_s_aq(volatile float *addr, float din) { + float dout; + asm volatile ("amofadd.s.aq %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + INLINE float nmc_amofadd_s_rl(volatile float *addr, float din) { + float dout; + asm volatile ("amofadd.s.rl %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + INLINE float nmc_amofadd_s_aqrl(volatile float *addr, float din) { + float dout; + asm volatile ("amofadd.s.aqrl %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + + INLINE void nmc_amofadd_s_nb(volatile float *addr, float din) { + asm volatile ("amofadd.s.nr %1,(%0)" : : "r" (addr), "f" (din)); + } + INLINE void nmc_amofadd_s_aq_nb(volatile float *addr, float din) { + asm volatile ("amofadd.s.nr.aq %1,(%0)" : : "r" (addr), "f" (din)); + } + INLINE void nmc_amofadd_s_rl_nb(volatile float *addr, float din) { + asm volatile ("amofadd.s.nr.rl %1,(%0)" : : "r" (addr), "f" (din)); + } + INLINE void nmc_amofadd_s_aqrl_nb(volatile float *addr, float din) { + asm volatile ("amofadd.s.nr.aqrl %1,(%0)" : : "r" (addr), "f" (din)); + } + + INLINE double nmc_amofadd_d(volatile double *addr, double din) { + double dout; + asm volatile ("amofadd.d %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + INLINE double nmc_amofadd_d_aq(volatile double *addr, double din) { + double dout; + asm volatile ("amofadd.d.aq %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + INLINE double nmc_amofadd_d_rl(volatile double *addr, double din) { + double dout; + asm volatile ("amofadd.d.rl %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + INLINE double nmc_amofadd_d_aqrl(volatile double *addr, double din) { + double dout; + asm volatile ("amofadd.d.aqrl %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + + INLINE void nmc_amofadd_d_nb(volatile double *addr, double din) { + asm volatile ("amofadd.d.nr %1,(%0)" : : "r" (addr), "f" (din)); + } + INLINE void nmc_amofadd_d_aq_nb(volatile double *addr, double din) { + asm volatile ("amofadd.d.nr.aq %1,(%0)" : : "r" (addr), "f" (din)); + } + INLINE void nmc_amofadd_d_rl_nb(volatile double *addr, double din) { + asm volatile ("amofadd.d.nr.rl %1,(%0)" : : "r" (addr), "f" (din)); + } + INLINE void nmc_amofadd_d_aqrl_nb(volatile double *addr, double din) { + asm volatile ("amofadd.d.nr.aqrl %1,(%0)" : : "r" (addr), "f" (din)); + } + + // AMOFMIN + + INLINE float nmc_amofmin_s(volatile float *addr, float din) { + float dout; + asm volatile ("amofmin.s %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + INLINE float nmc_amofmin_s_aq(volatile float *addr, float din) { + float dout; + asm volatile ("amofmin.s.aq %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + INLINE float nmc_amofmin_s_rl(volatile float *addr, float din) { + float dout; + asm volatile ("amofmin.s.rl %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + INLINE float nmc_amofmin_s_aqrl(volatile float *addr, float din) { + float dout; + asm volatile ("amofmin.s.aqrl %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + + INLINE void nmc_amofmin_s_nb(volatile float *addr, float din) { + asm volatile ("amofmin.s.nr %1,(%0)" : : "r" (addr), "f" (din)); + } + INLINE void nmc_amofmin_s_aq_nb(volatile float *addr, float din) { + asm volatile ("amofmin.s.nr.aq %1,(%0)" : : "r" (addr), "f" (din)); + } + INLINE void nmc_amofmin_s_rl_nb(volatile float *addr, float din) { + asm volatile ("amofmin.s.nr.rl %1,(%0)" : : "r" (addr), "f" (din)); + } + INLINE void nmc_amofmin_s_aqrl_nb(volatile float *addr, float din) { + asm volatile ("amofmin.s.nr.aqrl %1,(%0)" : : "r" (addr), "f" (din)); + } + + INLINE double nmc_amofmin_d(volatile double *addr, double din) { + double dout; + asm volatile ("amofmin.d %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + INLINE double nmc_amofmin_d_aq(volatile double *addr, double din) { + double dout; + asm volatile ("amofmin.d.aq %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + INLINE double nmc_amofmin_d_rl(volatile double *addr, double din) { + double dout; + asm volatile ("amofmin.d.rl %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + INLINE double nmc_amofmin_d_aqrl(volatile double *addr, double din) { + double dout; + asm volatile ("amofmin.d.aqrl %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + + INLINE void nmc_amofmin_d_nb(volatile double *addr, double din) { + asm volatile ("amofmin.d.nr %1,(%0)" : : "r" (addr), "f" (din)); + } + INLINE void nmc_amofmin_d_aq_nb(volatile double *addr, double din) { + asm volatile ("amofmin.d.nr.aq %1,(%0)" : : "r" (addr), "f" (din)); + } + INLINE void nmc_amofmin_d_rl_nb(volatile double *addr, double din) { + asm volatile ("amofmin.d.nr.rl %1,(%0)" : : "r" (addr), "f" (din)); + } + INLINE void nmc_amofmin_d_aqrl_nb(volatile double *addr, double din) { + asm volatile ("amofmin.d.nr.aqrl %1,(%0)" : : "r" (addr), "f" (din)); + } + + // AMOFMAX + + INLINE float nmc_amofmax_s(volatile float *addr, float din) { + float dout; + asm volatile ("amofmax.s %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + INLINE float nmc_amofmax_s_aq(volatile float *addr, float din) { + float dout; + asm volatile ("amofmax.s.aq %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + INLINE float nmc_amofmax_s_rl(volatile float *addr, float din) { + float dout; + asm volatile ("amofmax.s.rl %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + INLINE float nmc_amofmax_s_aqrl(volatile float *addr, float din) { + float dout; + asm volatile ("amofmax.s.aqrl %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + + INLINE void nmc_amofmax_s_nb(volatile float *addr, float din) { + asm volatile ("amofmax.s.nr %1,(%0)" : : "r" (addr), "f" (din)); + } + INLINE void nmc_amofmax_s_aq_nb(volatile float *addr, float din) { + asm volatile ("amofmax.s.nr.aq %1,(%0)" : : "r" (addr), "f" (din)); + } + INLINE void nmc_amofmax_s_rl_nb(volatile float *addr, float din) { + asm volatile ("amofmax.s.nr.rl %1,(%0)" : : "r" (addr), "f" (din)); + } + INLINE void nmc_amofmax_s_aqrl_nb(volatile float *addr, float din) { + asm volatile ("amofmax.s.nr.aqrl %1,(%0)" : : "r" (addr), "f" (din)); + } + + INLINE double nmc_amofmax_d(volatile double *addr, double din) { + double dout; + asm volatile ("amofmax.d %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + INLINE double nmc_amofmax_d_aq(volatile double *addr, double din) { + double dout; + asm volatile ("amofmax.d.aq %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + INLINE double nmc_amofmax_d_rl(volatile double *addr, double din) { + double dout; + asm volatile ("amofmax.d.rl %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + INLINE double nmc_amofmax_d_aqrl(volatile double *addr, double din) { + double dout; + asm volatile ("amofmax.d.aqrl %0,%2,(%1)" : "=f" (dout) : "r" (addr), "f" (din)); + return dout; + } + + INLINE void nmc_amofmax_d_nb(volatile double *addr, double din) { + asm volatile ("amofmax.d.nr %1,(%0)" : : "r" (addr), "f" (din)); + } + INLINE void nmc_amofmax_d_aq_nb(volatile double *addr, double din) { + asm volatile ("amofmax.d.nr.aq %1,(%0)" : : "r" (addr), "f" (din)); + } + INLINE void nmc_amofmax_d_rl_nb(volatile double *addr, double din) { + asm volatile ("amofmax.d.nr.rl %1,(%0)" : : "r" (addr), "f" (din)); + } + INLINE void nmc_amofmax_d_aqrl_nb(volatile double *addr, double din) { + asm volatile ("amofmax.d.nr.aqrl %1,(%0)" : : "r" (addr), "f" (din)); + } + + // Load and store + + INLINE void nmc_sb_nt(volatile uint8_t *_addr, uint8_t _din) { + asm volatile ("sb.nt %0,0(%1)" : : "r" (_din), "r" (_addr)); + } + INLINE void nmc_sh_nt(volatile uint16_t *_addr, uint16_t _din) { + asm volatile ("sh.nt %0,0(%1)" : : "r" (_din), "r" (_addr)); + } + INLINE void nmc_sw_nt(volatile uint32_t *_addr, uint32_t _din) { + asm volatile ("sw.nt %0,0(%1)" : : "r" (_din), "r" (_addr)); + } + INLINE void nmc_sd_nt(volatile uint64_t *_addr, int64_t _din) { + asm volatile ("sd.nt %0,0(%1)" : : "r" (_din), "r" (_addr)); + } +#if defined(__GNUC__) && !defined(__llvm__) + INLINE void nmc_fsw_nt(volatile float *addr, float din) { + asm volatile ("fsw.nt %0,0(%1)" : : "f" (din), "r" (addr)); + } + INLINE void nmc_fsd_nt(volatile double *addr, double din) { + asm volatile ("fsd.nt %0,0(%1)" : : "f" (din), "r" (addr)); + } +#else + extern void nmc_fstore32_nt(volatile float *addr, float din); + extern void nmc_fstore64_nt(volatile double *addr, double din); +#endif + + INLINE int8_t nmc_lb_nt(volatile int8_t *_addr) { + int8_t dout; + asm volatile ("lb.nt %0,0(%1)" : "=r" (dout) : "r" (_addr)); + return dout; + } + INLINE int16_t nmc_lh_nt(volatile int16_t *_addr) { + int16_t dout; + asm volatile ("lh.nt %0,0(%1)" : "=r" (dout) : "r" (_addr)); + return dout; + } + INLINE int32_t nmc_lw_nt(volatile int32_t *_addr) { + int32_t dout; + asm volatile ("lw.nt %0,0(%1)" : "=r" (dout) : "r" (_addr)); + return dout; + } + INLINE int64_t nmc_ld_nt(volatile int64_t *_addr) { + int64_t dout; + asm volatile ("ld.nt %0,0(%1)" : "=r" (dout) : "r" (_addr)); + return dout; + } + INLINE uint8_t nmc_lbu_nt(volatile uint8_t *_addr) { + uint8_t dout; + asm volatile ("lbu.nt %0,0(%1)" : "=r" (dout) : "r" (_addr)); + return dout; + } + INLINE uint16_t nmc_lhu_nt(volatile uint16_t *_addr) { + uint16_t dout; + asm volatile ("lhu.nt %0,0(%1)" : "=r" (dout) : "r" (_addr)); + return dout; + } + INLINE uint32_t nmc_lwu_nt(volatile uint32_t *_addr) { + uint32_t dout; + asm volatile ("lwu.nt %0,0(%1)" : "=r" (dout) : "r" (_addr)); + return dout; + } +#if defined(__GNUC__) && !defined(__llvm__) + INLINE float nmc_flw_nt(volatile float *_addr) { + float dout; + asm volatile ("flw.nt %0,0(%1)" : "=f" (dout) : "r" (_addr)); + return dout; + } + INLINE double nmc_fld_nt(volatile double *_addr) { + double dout; + asm volatile ("fld.nt %0,0(%1)" : "=f" (dout) : "r" (_addr)); + return dout; + } +#else + extern float nmc_flw_nt(volatile float *_addr); + extern double nmc_fld_nt(volatile double *_addr); +#endif + + // Cache line loads + + + INLINE int8_t nmc_lb_sp(volatile int8_t *_addr) { + int8_t dout; + asm volatile ("lb.sp %0,0(%1)" : "=r" (dout) : "r" (_addr)); + return dout; + } + INLINE int16_t nmc_lh_sp(volatile int16_t *_addr) { + int16_t dout; + asm volatile ("lh.sp %0,0(%1)" : "=r" (dout) : "r" (_addr)); + return dout; + } + INLINE int32_t nmc_lw_sp(volatile int32_t *_addr) { + int32_t dout; + asm volatile ("lw.sp %0,0(%1)" : "=r" (dout) : "r" (_addr)); + return dout; + } + INLINE int64_t nmc_ld_sp(volatile int64_t *_addr) { + int64_t dout; + asm volatile ("ld.sp %0,0(%1)" : "=r" (dout) : "r" (_addr)); + return dout; + } + INLINE uint8_t nmc_lbu_sp(volatile uint8_t *_addr) { + uint8_t dout; + asm volatile ("lbu.sp %0,0(%1)" : "=r" (dout) : "r" (_addr)); + return dout; + } + INLINE uint16_t nmc_lhu_sp(volatile uint16_t *_addr) { + uint16_t dout; + asm volatile ("lhu.sp %0,0(%1)" : "=r" (dout) : "r" (_addr)); + return dout; + } + INLINE uint32_t nmc_lwu_sp(volatile uint32_t *_addr) { + uint32_t dout; + asm volatile ("lwu.sp %0,0(%1)" : "=r" (dout) : "r" (_addr)); + return dout; + } +#if defined(__GNUC__) && !defined(__llvm__) + INLINE float nmc_flw_sp(volatile float *_addr) { + float dout; + asm volatile ("flw.sp %0,0(%1)" : "=f" (dout) : "r" (_addr)); + return dout; + } + INLINE double nmc_fld_sp(volatile double *_addr) { + double dout; + asm volatile ("fld.sp %0,0(%1)" : "=f" (dout) : "r" (_addr)); + return dout; + } +#else + extern float nmc_flw_sp(volatile float *_addr); + extern double nmc_fld_sp(volatile double *_addr); +#endif + + // ID + + INLINE uint64_t nmc_xid_dev(void) { + REGISTER uint64_t nmcId ASMR("x10"); + asm("xid.dev %0" : "=r" (nmcId) : ); + return nmcId; + } + + INLINE uint64_t nmc_xid_te(void) { + REGISTER uint64_t nmcTe ASMR("x10"); + asm("xid.te %0" : "=r" (nmcTe) : ); + return nmcTe; + } + + INLINE uint64_t nmc_xid_core(void) { + REGISTER uint64_t nmcCore ASMR("x10"); + asm("xid.core %0" : "=r" (nmcCore) : ); + return nmcCore; + } + + INLINE uint64_t nmc_xid_thrd(void) { + REGISTER uint64_t nmcThread ASMR("x10"); + asm("xid.thrd %0" : "=r" (nmcThread) : ); + return nmcThread; + } + + // tzc(out, in) + INLINE uint64_t nmc_tzc(uint64_t __in) { + uint64_t __out; + asm("tzc %0,%1" : "=r" (__out) : "r" (__in)); + return __out; + } + + // Lower thread priority + INLINE void nmc_xlp(void) { + asm volatile("xlp"); + } + + // Return thread priority to normal + INLINE void nmc_xnp(void) { + asm volatile("xnp"); + } + +#ifdef __cplusplus +} +#endif + +#endif // ENABLE_EMULATION \ No newline at end of file diff --git a/src/micron/nmc_types.h b/src/micron/nmc_types.h new file mode 100644 index 00000000..ae46a55f --- /dev/null +++ b/src/micron/nmc_types.h @@ -0,0 +1,166 @@ +/* + * Copyright (C) 2024 Micron Technology, Inc. + * + * This file is the confidential and proprietary property of + * Micron Technology, Inc. + */ + +#pragma once + +#include <inttypes.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This file conatins all of the typedef's and enum's definitions used + * by nmc api header files for both host and Risc-V api's. + */ + +typedef struct NmcApi* nmc_t; +typedef struct NmcLockApi* nmc_lock_t; +typedef struct NmcMutexApi* nmc_mutex_t; +typedef struct NmcBarrierApi* nmc_barrier_t; + +typedef uint16_t nmc_status_t; +typedef uint64_t nmc_event_t; +typedef uint16_t nmc_cid_t; + +typedef struct NmcHucResponseApi* nmc_response_t; +typedef struct NmcAttachPropsApi* nmc_attach_props_t; +typedef struct NmcCmdPropsApi* nmc_cmd_props_t; + +typedef int32_t nmc_remote_host_t; + +#define NMC_CMD_BITS 12 + +enum __nmc_cmd { + NmcCmdInvalid = 0x000, + NmcCmdRead = 0x001, + NmcCmdWrite = 0x002, + NmcCmdClean = 0x003, + NmcCmdFlush = 0x004, + NmcCmdMscTagRead = 0x005, + NmcCmdAmo = 0x006, + NmcCmdAmoNr = 0x007, + NmcCmdSyscall = 0x008, + NmcCmdTrap = 0x009, + NmcCmdEventDestination = 0x00A, + NmcCmdEventMode = 0x00B, + NmcCmdEventSend = 0x00C, + NmcCmdEventBroadcast = 0x00D, + NmcCmdEventReceive = 0x00E, + NmcCmdTeCall = 0x00F, + NmcCmdTeReturn = 0x010, + NmcCmdWrReq = 0x011, + NmcCmdHusRsp = 0x012, + NmcCmdDmCopy = 0x013, + NmcCmdDmGatherStride = 0x014, + NmcCmdDmGatherAddress = 0x015, + NmcCmdDmGatherIndex = 0x016, + NmcCmdDmScatterStride = 0x017, + NmcCmdDmScatterAddress = 0x018, + NmcCmdDmScatterIndex = 0x019, + NmcCmdDmSet = 0x01A, + NmcCmdDmReturn = 0x01B, + NmcCmdSeCall = 0x01C, + NmcCmdSeReturn = 0x01D, + NmcCmdEventAck = 0x01E, + NmcCmdAmoAdd32 = 0x0A0, + NmcCmdAmoAnd32 = 0x0A1, + NmcCmdAmoXor32 = 0x0A2, + NmcCmdAmoOr32 = 0x0A3, + NmcCmdAmoMin32 = 0x0A4, + NmcCmdAmoMax32 = 0x0A5, + NmcCmdAmoMinU32 = 0x0A6, + NmcCmdAmoMaxU32 = 0x0A7, + NmcCmdAmoSwap32 = 0x0A8, + NmcCmdAmoCas32 = 0x0A9, + NmcCmdAmoFadd32 = 0x0AA, + NmcCmdAmoFmin32 = 0x0AB, + NmcCmdAmoFmax32 = 0x0AC, + NmcCmdAmoAdd64 = 0x0B0, + NmcCmdAmoAnd64 = 0x0B1, + NmcCmdAmoXor64 = 0x0B2, + NmcCmdAmoOr64 = 0x0B3, + NmcCmdAmoMin64 = 0x0B4, + NmcCmdAmoMax64 = 0x0B5, + NmcCmdAmoMinU64 = 0x0B6, + NmcCmdAmoMaxU64 = 0x0B7, + NmcCmdAmoSwap64 = 0x0B8, + NmcCmdAmoCas64 = 0x0B9, + NmcCmdAmoFadd64 = 0x0BA, + NmcCmdAmoFmin64 = 0x0BB, + NmcCmdAmoFmax64 = 0x0BC, + NmcCmdLoopBack = 0xffd, + NmcCmdAtomic = 0xffe, + NmcCmdCreateThread = 0xfff +}; + +typedef enum __nmc_cmd ENmcCmd; +typedef enum __nmc_cmd nmc_cmd_t; + +/* + * Resource Management + */ + +/* + * Stack protection modes for threads/fibers are: + * AllowThreadWriteAll No write protection for stack + * AllowFiberWriteMaster Allow fibers to have write access to + * it parent stack. + * AllowThreadWriteOwn Only current thread can write to its + * stack. + */ +enum __nmc_stack_check_mode { + AllowThreadWriteAll = 0, + AllowFiberWriteMaster = 1, + AllowThreadWriteOwn = 2 +}; + +typedef enum __nmc_stack_check_mode EStackCheckMode; +typedef enum __nmc_stack_check_mode nmc_stack_check_mode_t; + +enum __nmc_event_mode { + NmcEventSimpleMode = 0, + NmcEventBroadcastMode = 1, + NmcEventCollectiveSimpleMode = 2, + NmcEventCollectiveReduceMode = 3, + NmcEventCollectiveCascadeMode = 4, +}; + +typedef enum __nmc_event_mode ENmcEventMode; +typedef enum __nmc_event_mode nmc_event_mode_t; + +enum __nmc_event_reduce_op_type { + // Used only in CollectReduce + Fadd = 0, + Add = 1, + Fmin = 2, + Min = 3, + Umin = 4, + Fmax = 5, + Max = 6, + Umax = 7, + And = 8, + Or = 9, + Xor = 10, +}; + +typedef enum __nmc_event_reduce_op_type ENmcEventReduceOpType; +typedef enum __nmc_event_reduce_op_type nmc_event_reduce_op_type_t; + +enum __nmc_event_reduce_op_size { + // Used only in CollectReduce + FourBytes = 0, + EightBytes = 1, +}; + +typedef enum __nmc_event_reduce_op_size ENmcEventReduceOpSize; +typedef enum __nmc_event_reduce_op_size nmc_event_reduce_op_size_t; + +#ifdef __cplusplus +} +#endif + -- GitLab