diff --git a/Makefile b/Makefile index 7ef5bdf35..b4b73d53f 100644 --- a/Makefile +++ b/Makefile @@ -4,24 +4,42 @@ LDFLAGS = LDLIBS = -lm INCLUDES = CFLAGS_COND = -march=native - -# Find nvcc -NVCC := $(shell which nvcc 2>/dev/null) +SHELL_UNAME = $(shell uname) +REMOVE_FILES = rm -f +OUTPUT_FILE = -o $@ # NVCC flags NVCC_FLAGS = -O3 --use_fast_math NVCC_LDFLAGS = -lcublas -lcublasLt -# Function to test if the compiler accepts a given flag. -define check_and_add_flag +ifneq ($(OS), Windows_NT) + NVCC := $(shell which nvcc 2>/dev/null) + + # Function to test if the compiler accepts a given flag. + define check_and_add_flag $(eval FLAG_SUPPORTED := $(shell printf "int main() { return 0; }\n" | $(CC) $(1) -x c - -o /dev/null 2>/dev/null && echo 'yes')) ifeq ($(FLAG_SUPPORTED),yes) CFLAGS += $(1) endif -endef + endef -# Check each flag and add it if supported -$(foreach flag,$(CFLAGS_COND),$(eval $(call check_and_add_flag,$(flag)))) + # Check each flag and add it if supported + $(foreach flag,$(CFLAGS_COND),$(eval $(call check_and_add_flag,$(flag)))) +else + CFLAGS := + REMOVE_FILES = del + SHELL_UNAME := Windows + NVCC := $(shell where nvcc 2> nul) + CC := cl + CFLAGS = /Idev /Zi /nologo /Wall /WX- /diagnostics:column /sdl /O2 /Oi /Ot /GL /D _DEBUG /D _CONSOLE /D _UNICODE /D UNICODE /Gm- /EHsc /MD /GS /Gy /fp:fast /Zc:wchar_t /Zc:forScope /Zc:inline /permissive- \ + /external:W3 /Gd /TP /wd4996 /FC /openmp:llvm + LDFLAGS := + LDLIBS := + INCLUDES := + NVCC_FLAGS += -I"dev" + WIN_CUDA_RENAME = rename $@.exe $@ + OUTPUT_FILE = /link /OUT:$@ +endif # Check if OpenMP is available # This is done by attempting to compile an empty file with OpenMP flags @@ -34,7 +52,7 @@ ifeq ($(NO_OMP), 1) $(info OpenMP is manually disabled) else # Detect if running on macOS or Linux - ifeq ($(shell uname), Darwin) + ifeq ($(SHELL_UNAME), Darwin) # Check for Homebrew's libomp installation in different common directories ifeq ($(shell [ -d /opt/homebrew/opt/libomp/lib ] && echo "exists"), exists) # macOS with Homebrew on ARM (Apple Silicon) @@ -54,18 +72,20 @@ else $(warning OpenMP not found, skipping OpenMP support) endif else - # Check for OpenMP support in GCC or Clang on Linux - ifeq ($(shell echo | $(CC) -fopenmp -x c -E - > /dev/null 2>&1; echo $$?), 0) - CFLAGS += -fopenmp -DOMP - LDLIBS += -lgomp - $(info OpenMP found, compiling with OpenMP support) - else - $(warning OpenMP not found, skipping OpenMP support) + ifneq ($(OS), Windows_NT) + # Check for OpenMP support in GCC or Clang on Linux + ifeq ($(shell echo | $(CC) -fopenmp -x c -E - > /dev/null 2>&1; echo $$?), 0) + CFLAGS += -fopenmp -DOMP + LDLIBS += -lgomp + $(info OpenMP found, compiling with OpenMP support) + else + $(warning OpenMP not found, skipping OpenMP support) + endif endif endif endif -# PHONY means these targets will always be executed +# Force the rebuild of targets .PHONY: all train_gpt2 test_gpt2 train_gpt2cu test_gpt2cu train_gpt2fp32cu test_gpt2fp32cu # Add targets @@ -76,31 +96,36 @@ ifeq ($(NVCC),) $(info nvcc not found, skipping CUDA builds) else $(info nvcc found, including CUDA builds) - TARGETS += train_gpt2cu test_gpt2cu train_gpt2fp32cu test_gpt2fp32cu + TARGETS += train_gpt2cu test_gpt2cu train_gpt2fp32cu test_gpt2fp32cu profile_gpt2cu endif all: $(TARGETS) train_gpt2: train_gpt2.c - $(CC) $(CFLAGS) $(INCLUDES) $(LDFLAGS) $< $(LDLIBS) -o $@ + $(CC) $(CFLAGS) $(INCLUDES) $(LDFLAGS) $< $(LDLIBS) $(OUTPUT_FILE) test_gpt2: test_gpt2.c - $(CC) $(CFLAGS) $(INCLUDES) $(LDFLAGS) $< $(LDLIBS) -o $@ + $(CC) $(CFLAGS) $(INCLUDES) $(LDFLAGS) $< $(LDLIBS) $(OUTPUT_FILE) train_gpt2cu: train_gpt2.cu $(NVCC) $(NVCC_FLAGS) $< $(NVCC_LDFLAGS) -o $@ + $(WIN_CUDA_RENAME) train_gpt2fp32cu: train_gpt2_fp32.cu $(NVCC) $(NVCC_FLAGS) $< $(NVCC_LDFLAGS) -o $@ + $(WIN_CUDA_RENAME) test_gpt2cu: test_gpt2.cu $(NVCC) $(NVCC_FLAGS) $< $(NVCC_LDFLAGS) -o $@ + $(WIN_CUDA_RENAME) test_gpt2fp32cu: test_gpt2_fp32.cu $(NVCC) $(NVCC_FLAGS) $< $(NVCC_LDFLAGS) -o $@ + $(WIN_CUDA_RENAME) profile_gpt2cu: profile_gpt2.cu $(NVCC) $(NVCC_FLAGS) -lineinfo $< $(NVCC_LDFLAGS) -o $@ + $(WIN_CUDA_RENAME) clean: - rm -f train_gpt2 test_gpt2 train_gpt2cu train_gpt2fp32cu test_gpt2cu test_gpt2fp32cu + $(REMOVE_FILES) train_gpt2 test_gpt2 train_gpt2cu train_gpt2fp32cu test_gpt2cu test_gpt2fp32cu profile_gpt2cu