编译器选择与优化标志
set(CMAKE_C_COMPILER gcc)
set(CMAKE_CXX_COMPILER g++)
# 针对不同构建类型设置优化级别
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
set(CMAKE_C_FLAGS_RELEASE "-O3 -march=native -mtune=native")
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native")
多线程构建
include(ProcessorCount)
ProcessorCount(N)
if(NOT N EQUAL 0)
set(CMAKE_BUILD_PARALLEL_LEVEL ${N})
message(STATUS "Building with ${N} parallel jobs")
endif()
向量化指令支持
# 检查AVX/AVX2/AVX-512支持
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("-mavx" COMPILER_SUPPORTS_AVX)
check_cxx_compiler_flag("-mavx2" COMPILER_SUPPORTS_AVX2)
check_cxx_compiler_flag("-mavx512f" COMPILER_SUPPORTS_AVX512)
if(COMPILER_SUPPORTS_AVX512)
add_compile_options(-mavx512f -mavx512cd)
elseif(COMPILER_SUPPORTS_AVX2)
add_compile_options(-mavx2)
elseif(COMPILER_SUPPORTS_AVX)
add_compile_options(-mavx)
endif()
OpenMP支持
find_package(OpenMP REQUIRED)
if(OpenMP_FOUND)
add_definitions(-DUSE_OPENMP)
target_link_libraries(your_target PUBLIC OpenMP::OpenMP_CXX)
endif()
MPI支持
find_package(MPI REQUIRED)
if(MPI_FOUND)
include_directories(${MPI_INCLUDE_PATH})
target_link_libraries(your_target PUBLIC ${MPI_CXX_LIBRARIES})
endif()
链接时优化(LTO)
include(CheckIPOSupported)
check_ipo_supported(RESULT result OUTPUT output)
if(result)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
endif()
特定架构优化
# 针对AMD EPYC或Intel Xeon优化
if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
add_compile_options(-mtune=znver2) # AMD EPYC
# 或
# add_compile_options(-mtune=skylake-avx512) # Intel Xeon
endif()
内存对齐控制
# 确保内存对齐适合SIMD指令
add_compile_options(-malign-double -mpreferred-stack-boundary=8)
性能分析工具支持
option(ENABLE_PROFILING "Enable profiling support" OFF)
if(ENABLE_PROFILING)
add_compile_options(-pg)
add_link_options(-pg)
endif()
GNU gprof支持
option(ENABLE_GPROF "Enable gprof profiling" OFF)
if(ENABLE_GPROF)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pg")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pg")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pg")
endif()
BLAS/LAPACK支持
find_package(BLAS REQUIRED)
find_package(LAPACK REQUIRED)
if(BLAS_FOUND AND LAPACK_FOUND)
target_link_libraries(your_target PUBLIC ${BLAS_LIBRARIES} ${LAPACK_LIBRARIES})
endif()
FFTW支持
find_package(FFTW REQUIRED)
if(FFTW_FOUND)
target_link_libraries(your_target PUBLIC FFTW::FFTW3)
endif()
CUDA支持
find_package(CUDA REQUIRED)
if(CUDA_FOUND)
enable_language(CUDA)
set(CMAKE_CUDA_ARCHITECTURES "70;75;80") # 根据你的GPU架构调整
target_link_libraries(your_target PUBLIC CUDA::cudart CUDA::cublas)
endif()
Ninja构建系统
# 在配置时使用: cmake -G Ninja ..
if(NOT CMAKE_GENERATOR MATCHES "Ninja")
message(WARNING "Consider using Ninja generator for faster builds: cmake -G Ninja ..")
endif()
CCache支持
find_program(CCACHE_PROGRAM ccache)
if(CCACHE_PROGRAM)
set(CMAKE_C_COMPILER_LAUNCHER ${CCACHE_PROGRAM})
set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_PROGRAM})
endif()
构建缓存
# 使用: cmake -DCMAKE_BUILD_CACHE_DIR=/path/to/cache ..
if(CMAKE_BUILD_CACHE_DIR)
set(CMAKE_REBUILD_CACHE ${CMAKE_BUILD_CACHE_DIR})
endif()
cmake_minimum_required(VERSION 3.12)
project(HighPerformanceComputingApp LANGUAGES CXX)
# 基本配置
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
# 编译器优化
set(CMAKE_BUILD_TYPE Release)
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -mtune=native -ffast-math -funroll-loops")
# 向量化支持
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("-mavx2" COMPILER_SUPPORTS_AVX2)
if(COMPILER_SUPPORTS_AVX2)
add_compile_options(-mavx2)
endif()
# 并行计算支持
find_package(OpenMP REQUIRED)
find_package(MPI REQUIRED)
# 高性能库
find_package(BLAS REQUIRED)
find_package(LAPACK REQUIRED)
# 主目标
add_executable(hpc_app main.cpp)
# 链接库
target_link_libraries(hpc_app
PRIVATE
OpenMP::OpenMP_CXX
${MPI_CXX_LIBRARIES}
${BLAS_LIBRARIES}
${LAPACK_LIBRARIES}
)
# 安装配置
install(TARGETS hpc_app DESTINATION bin)
这些配置技巧可以根据你的具体应用需求进行调整和组合,以获得最佳性能表现。