From b86bd6947c7e556753de4b937d256a2cbf91076b Mon Sep 17 00:00:00 2001 From: Markus Schwarz <markus.schwarz@kit.edu> Date: Fri, 14 Feb 2025 16:29:22 +0100 Subject: [PATCH] fix for --optimize compile option and ARM chips The --optimize compile option included some x86 specific instruction sets. These lead to errors when an ARM-based CPU is used. This fix checks for ARM CPU and then ignores the x86 specific instruction. --- blond/compile.py | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/blond/compile.py b/blond/compile.py index 63162b08..822f011a 100644 --- a/blond/compile.py +++ b/blond/compile.py @@ -22,6 +22,7 @@ from __future__ import print_function import argparse import ctypes import os +import platform import subprocess import sys @@ -211,21 +212,23 @@ def compile_cpp_library(args, cflags, float_flags, libs, cpp_files): # Format the output list stdout = ret.stdout.replace('#define ', '').replace( '__ 1', '').replace('__', '').split('\n') - # Add the appropriate vectorization flag (not use avx512) - if 'AVX2' in stdout: - cflags += ['-mavx2'] - elif 'AVX' in stdout: - cflags += ['-mavx'] - elif 'SSE4_2' in stdout or 'SSE4_1' in stdout: - cflags += ['-msse4'] - elif 'SSE3' in stdout: - cflags += ['-msse3'] - else: - cflags += ['-msse'] - - # Add FMA if supported - if 'FMA' in stdout: - cflags += ['-mfma'] + # following options exist only on x86 processors + if 'arm' not in platform.machine(): + # Add the appropriate vectorization flag (not use avx512) + if 'AVX2' in stdout: + cflags += ['-mavx2'] + elif 'AVX' in stdout: + cflags += ['-mavx'] + elif 'SSE4_2' in stdout or 'SSE4_1' in stdout: + cflags += ['-msse4'] + elif 'SSE3' in stdout: + cflags += ['-msse3'] + else: + cflags += ['-msse'] + + # Add FMA if supported + if 'FMA' in stdout: + cflags += ['-mfma'] root, ext = os.path.splitext(args['libname']) if not ext: @@ -312,10 +315,9 @@ def compile_cuda_library(args, nvccflags, float_flags, cuda_files, nvcc): # Compile the GPU library # print('\n' + ''.join(['='] * 80)) print('\nCompiling the CUDA library') - import cupy as cp - if args['gpu'] == 'discover': print('Discovering the device compute capability..') + import cupy as cp dev = cp.cuda.Device(0) dev_name = cp.cuda.runtime.getDeviceProperties(dev)['name'] -- GitLab