Skip to content

Commit

Permalink
Merge pull request #2 from mumax/master
Browse files Browse the repository at this point in the history
update my fork to match the mumax git
  • Loading branch information
jsampaio authored Jan 11, 2018
2 parents 8183e01 + 7c2b372 commit 5109559
Show file tree
Hide file tree
Showing 506 changed files with 126,120 additions and 31,415 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ mumax3.*linux*cuda*
*.*~
tmp/
*.out
.idea/
16 changes: 16 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
language: go
dist: trusty
sudo: required
install: true
env:
global:
- GOARCH=amd64
before_install:
- wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/cuda-repo-ubuntu1404_8.0.44-1_amd64.deb
- sudo dpkg -i cuda-repo-ubuntu1404_8.0.44-1_amd64.deb
- sudo apt-get -qq update
- sudo apt-get install cuda -y
script:
- go get -u github.com/barnex/fftw
- go get -u github.com/barnex/matrix
- go build ./...
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
mumax3
======
[![Build Status](https://travis-ci.org/mumax/3.svg?branch=master)](https://travis-ci.org/mumax/3)

GPU accelerated micromagnetic simulator.

Expand Down Expand Up @@ -58,7 +59,7 @@ Your binary is now at `$GOPATH/bin/mumax3`
To do all at once on Ubuntu:
```
sudo apt-get install git golang-go gcc nvidia-cuda-toolkit nvidia-cuda-dev nvidia-340 gnuplot
export GOPATH=$HOME go get -u -v github.com/mumax/3/cmd/mumax3
GOPATH=$HOME go get -u -v github.com/mumax/3/cmd/mumax3
```

Contributing
Expand Down
8 changes: 4 additions & 4 deletions bench/gpus.gplot
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
#! /usr/bin/gnuplot

set term pdf size 4in, 2in;
set term pdf size 4in, 3in;
set output "gpus.pdf"
set boxwidth 0.5
set style fill solid
set key off
set ylabel "throughput (M cells/s)"
set xtics rotate by -45
set xtics rotate by -90
#set xtics out offset 0,-1.2

set yrange[0:200]
plot "gpus.txt" u ($0+1):($2/1e6):xtic(4) w boxes, "oommf4M.txt" u (0):(4*$1**2 * $2 /$3/1e6):xtic("OOMMF(CPU)") w boxes
set yrange[0:350]
plot "gpus.txt" u ($0+1):($2/1e6):xtic(4) w boxes, "oommf4M.txt" u (0):(4*$1**2 * $2 /$3/1e6):xtic("OOMMF(CPU)") w boxes

set output

23 changes: 17 additions & 6 deletions bench/gpus.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,23 @@
4.194304e+06 2.0895753045290217e+07 1.6945210251645377e-14 "GTX 650M"
4.194304e+06 6.696363509497398e+07 1.6945210251645377e-14 "M2070"
1.048576e+06 4.033703669606458e+07 1.6946205941339026e-14 "GT 755M (iMac 2013)"
4.194304e+06 5.527840160261479e+07 1.6945213680693907e-14 "GTX 860M"
4.194304e+06 6.696363509497398e+07 1.6945210251645377e-14 "TESLA M2070"
4.194304e+06 6.945448262641394e+07 1.6945209171548022e-14 "TESLA 2050"
4.194304e+06 8.771628448955536e+07 1.5722384085696076e-14 "GTX 660"
4.194304e+06 1.1436826272529118e+08 1.5722384085696076e-14 "GTX 680"
4.194304e+06 9.70511185846317e+07 1.6945209171548022e-14 "Quadro K4200"
4.194304e+06 1.1248743156367055e+08 1.5722384085696076e-14 "GTX 480"
4.194304e+06 1.2765247865136869e+08 1.5722384085696076e-14 "GTX 580"
4.194304e+06 1.1436826272529118e+08 1.5722384085696076e-14 "GTX 680"
4.194304e+06 1.205878382259874e+08 1.6945213680693907e-14 "GTX 970"
4.194304e+06 1.3222269984079185e+08 1.6945213680693907e-14 "GTX 980"
4.194304e+06 1.3064462210481596e+08 1.6945210251645377e-14 "K20XM"
4.194304e+06 1.2765247865136869e+08 1.5722384085696076e-14 "GTX 580"
4.194304e+06 1.282698958240901e+08 1.6945224358062508e-14 "GTX 1060 (mobile)"
4.194304e+06 1.3064462210481596e+08 1.6945210251645377e-14 "TESLA K20XM"
4.194304e+06 1.3222269984079185e+08 1.6945213680693907e-14 "GTX 980"
4.194304e+06 1.702974441584964e+08 1.6945224358062508e-14 "GTX 1070"
4.194304e+06 1.752769108651334e+08 1.6945209171548022e-14 "GTX TITAN BLACK FE"
4.194304e+06 1.7967269114941204e+08 1.6945210251645377e-14 "GTX TITAN"
4.194304e+06 1.8968187897582138e+08 1.6945207552204512e-14 "GTX 1080"
4.194304e+06 1.9961744689897743e+08 1.6945207552204512e-14 "GTX 980 Ti"
4.194304e+06 1.9961744689897743e+08 1.6945207552204512e-14 "GTX 980 Ti"
4.194304e+06 2.7516254149838316e+08 1.6945224358062508e-14 "GTX 1080 Ti"
4.194304e+06 2.747775864824991e+08 1.694521108780564e-14 "GTX TITAN X (Pascal)"
4.194304e+06 3.413751301174529e+08 1.6945224358062508e-14 "GTX TITAN Xp"
4.194304e+06 3.6200963642011607e+08 1.6945207552204512e-14 "TESLA P100"
11 changes: 10 additions & 1 deletion cmd/mumax3-convert/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import (
"log"
"os"
"path"
"path/filepath"
"strconv"
"strings"

Expand Down Expand Up @@ -125,8 +126,16 @@ func main() {
log.Fatal("no output format specified (e.g.: -png)")
}

// expand wildcards which are not expanded by the shell
// (pointing a finger at cmd.exe)
var fnames []string
for _, input := range flag.Args() {
fmt.Println(input)
expanded, _ := filepath.Glob(input)
fnames = append(fnames, expanded...)
}
// read all input files and put them in the task que
for _, fname := range flag.Args() {
for _, fname := range fnames {
for _, outp := range wantOut {
fname := fname // closure caveats
outp := outp
Expand Down
13 changes: 9 additions & 4 deletions cmd/mumax3-server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,20 @@ func main() {
go func() {
log.Println("serving at", thisAddr)

// Resolve the IPs for thisHost
thisIP, err := net.LookupHost(thisHost)
Fatal(err)

// try to listen and serve on all interfaces other than thisAddr
// this is for convenience, errors are not fatal.
_, p, err := net.SplitHostPort(thisAddr)
Fatal(err)
ips := util.InterfaceAddrs()
for _, ip := range ips {
addr := net.JoinHostPort(ip, p)
if addr != thisAddr { // skip thisAddr, will start later and is fatal on error
if !contains(thisIP, ip) { // skip thisIP, will start later and is fatal on error
go func() {
log.Println("serving at", addr)
err := http.ListenAndServe(addr, nil)
if err != nil {
log.Println("info:", err, "(but still serving other interfaces)")
Expand Down Expand Up @@ -178,7 +183,7 @@ func parseIPs() []string {
if len(split) != 4 {
log.Fatal("invalid IP address range:", s)
}
var start, stop [4]byte
var start, stop [4]uint
for i, s := range split {
split := strings.Split(s, "-")
first := atobyte(split[0])
Expand All @@ -204,13 +209,13 @@ func parseIPs() []string {
return IPs
}

func atobyte(a string) byte {
func atobyte(a string) uint {
i, err := strconv.Atoi(a)
if err != nil {
panic(err)
}
if int(byte(i)) != i {
panic("too large")
}
return byte(i)
return uint(i)
}
2 changes: 2 additions & 0 deletions cmd/mumax3/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,4 +164,6 @@ func printVersion() {
fmt.Print("//", cuda.GPUInfo, ", using CC", cuda.UseCC, " PTX \n")
fmt.Print("//(c) Arne Vansteenkiste, Dynamat LAB, Ghent University, Belgium", "\n")
fmt.Print("//This is free software without any warranty. See license.txt", "\n")
fmt.Print("//If you use mumax in any work or publication,", "\n")
fmt.Print("//we kindly ask you to cite the references in references.bib", "\n")
}
3 changes: 2 additions & 1 deletion cmd/mumax3/queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,10 @@ func run(inFile string, gpu int, webAddr string) {

cmd := exec.Command(os.Args[0], flags...)
log.Println(os.Args[0], flags)
err := cmd.Run()
output, err := cmd.CombinedOutput()
if err != nil {
log.Println(inFile, err)
log.Printf("%s\n", output)
exitStatus.set(1)
numFailed.inc()
if *flag_failfast {
Expand Down
125 changes: 125 additions & 0 deletions cuda/copypadmul2_20.ptx
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Thu Jul 18 02:37:37 2013 (1374107857)
// Cuda compilation tools, release 5.5, V5.5.0
//

.version 3.2
.target sm_20
.address_size 64

.file 1 "/home/ragnarok/go/src/github.com/mumax/3/cuda/copypadmul2.cu", 1508839776, 830

.visible .entry copypadmul2(
.param .u64 copypadmul2_param_0,
.param .u32 copypadmul2_param_1,
.param .u32 copypadmul2_param_2,
.param .u32 copypadmul2_param_3,
.param .u64 copypadmul2_param_4,
.param .u32 copypadmul2_param_5,
.param .u32 copypadmul2_param_6,
.param .u32 copypadmul2_param_7,
.param .u64 copypadmul2_param_8,
.param .f32 copypadmul2_param_9,
.param .u64 copypadmul2_param_10
)
{
.reg .pred %p<8>;
.reg .s32 %r<22>;
.reg .f32 %f<15>;
.reg .s64 %rd<17>;
.reg .f64 %fd<3>;


ld.param.u64 %rd7, [copypadmul2_param_0];
ld.param.u32 %r5, [copypadmul2_param_1];
ld.param.u32 %r6, [copypadmul2_param_2];
ld.param.u64 %rd8, [copypadmul2_param_4];
ld.param.u32 %r7, [copypadmul2_param_5];
ld.param.u32 %r8, [copypadmul2_param_6];
ld.param.u32 %r9, [copypadmul2_param_7];
ld.param.u64 %rd5, [copypadmul2_param_8];
ld.param.f32 %f13, [copypadmul2_param_9];
ld.param.u64 %rd6, [copypadmul2_param_10];
cvta.to.global.u64 %rd1, %rd7;
cvta.to.global.u64 %rd2, %rd8;
cvta.to.global.u64 %rd3, %rd6;
cvta.to.global.u64 %rd4, %rd5;
.loc 1 14 1
mov.u32 %r10, %ntid.x;
mov.u32 %r11, %ctaid.x;
mov.u32 %r12, %tid.x;
mad.lo.s32 %r1, %r10, %r11, %r12;
.loc 1 15 1
mov.u32 %r13, %ntid.y;
mov.u32 %r14, %ctaid.y;
mov.u32 %r15, %tid.y;
mad.lo.s32 %r2, %r13, %r14, %r15;
.loc 1 16 1
mov.u32 %r16, %ntid.z;
mov.u32 %r17, %ctaid.z;
mov.u32 %r18, %tid.z;
mad.lo.s32 %r3, %r16, %r17, %r18;
.loc 1 18 1
setp.lt.s32 %p1, %r1, %r7;
setp.lt.s32 %p2, %r2, %r8;
and.pred %p3, %p1, %p2;
.loc 1 18 1
setp.lt.s32 %p4, %r3, %r9;
and.pred %p5, %p3, %p4;
.loc 1 18 1
@!%p5 bra BB0_7;
bra.uni BB0_1;

BB0_1:
.loc 1 19 1
mad.lo.s32 %r19, %r3, %r8, %r2;
mad.lo.s32 %r4, %r19, %r7, %r1;
.loc 1 20 1
setp.eq.s64 %p6, %rd5, 0;
@%p6 bra BB0_3;

mul.wide.s32 %rd9, %r4, 4;
add.s64 %rd10, %rd4, %rd9;
.loc 1 20 1
ld.global.f32 %f7, [%rd10];
mul.f32 %f13, %f7, %f13;

BB0_3:
cvt.f64.f32 %fd1, %f13;
mul.f64 %fd2, %fd1, 0d3EB515370F99F6CB;
cvt.rn.f32.f64 %f3, %fd2;
.loc 1 21 1
setp.ne.s64 %p7, %rd6, 0;
@%p7 bra BB0_5;

mov.f32 %f14, 0f3F800000;
bra.uni BB0_6;

BB0_5:
mul.wide.s32 %rd11, %r4, 4;
add.s64 %rd12, %rd3, %rd11;
.loc 1 21 1
ld.global.f32 %f8, [%rd12];
mul.f32 %f14, %f8, 0f3F800000;

BB0_6:
mul.wide.s32 %rd13, %r4, 4;
add.s64 %rd14, %rd2, %rd13;
.loc 1 22 1
ld.global.f32 %f10, [%rd14];
mul.f32 %f11, %f3, %f14;
mul.f32 %f12, %f11, %f10;
mad.lo.s32 %r20, %r3, %r6, %r2;
mad.lo.s32 %r21, %r20, %r5, %r1;
mul.wide.s32 %rd15, %r21, 4;
add.s64 %rd16, %rd1, %rd15;
.loc 1 22 1
st.global.f32 [%rd16], %f12;

BB0_7:
.loc 1 24 2
ret;
}


Loading

0 comments on commit 5109559

Please sign in to comment.