I recently started to work on this project . It's a model about collective motion. In this code 8192 particle move in 1000000 steps and in each step each particles position and angle updates. in each step neighbors of each particle sync their angle with that specific particle. I wrote a code in python but that was so slow . then I wrote this code in go but it's still not fast . each step takes average 1.3 s which is not good . do u guys have any idea about making it faster ?
package main
import (
"bufio"
"flag"
"fmt"
"log"
"math"
"math/rand"
"os"
"runtime"
"time"
)
const (
n_particles int = 8192
n_steps int = 1000000
dt float64 = 1.0
v0 float64 = 0.5
radius float64 = 1.0
f_intensity float64 = 1.2
scale float64 = 128.0
alpha float64 = 1 / 36
)
var (
x [n_particles + 1]float64
y [n_particles + 1]float64
angles [n_particles + 1]float64
vx [n_particles + 1]float64
vy [n_particles + 1]float64
order [n_steps + 1]float64
fxk float64
fyk float64
fxi float64
fyi float64
)
func main() {
vstart := time.Now()
rsource := rand.NewSource(time.Now().UnixNano())
randomizer := rand.New(rsource)
//defining random data
for i := 0; i <= n_particles; {
x[i] = (randomizer.Float64()) * scale
y[i] = (randomizer.Float64()) * scale
angles[i] = (randomizer.Float64()) * math.Pi * 2
vx[i] = v0 * float64(math.Cos(angles[i]))
vy[i] = v0 * float64(math.Sin(angles[i]))
i = i + 1
}
for i := 0; i <= n_steps; {
start := time.Now()
cosangles := 0.0
sinangles := 0.0
for j := 0; j <= n_particles; {
x[j] = x[j] + (vx[j] * dt)
x[j] = math.Mod(x[j], scale)
y[j] = y[j] + (vy[j] * dt)
y[j] = math.Mod(x[j], scale)
j = j + 1
}
m_angles := angles
for j := 0; j <= n_particles; {
sumanglesx := 0.0
sumanglesy := 0.0
fxi = math.Floor(x[j])
fyi = math.Floor(y[j])
for k := 0; k <= n_particles; {
if k != j {
fxk = math.Floor(x[k])
fyk = math.Floor(y[k])
if (fxi == fxk && fyi == fyk) || (fxi+1 == fxk && fyi == fyk) || (fxi-1 == fxk && fyi == fyk) || (fxi == fxk && fyi+1 == fyk) || (fxi == fxk && fyi-1 == fyk) || (fxi+1 == fxk && fyi+1 == fyk) || (fxi-1 == fxk && fyi-1 == fyk) || (fxi-1 == fxk && fyi+1 == fyk) || (fxi+1 == fxk && fyi-1 == fyk) {
dist := math.Pow((x[k]-x[j]), 2) + math.Pow((y[k]-y[j]), 2)
if dist < radius {
if k > j {
sx := math.Cos(angles[k])
sy := math.Sin(angles[k])
sumanglesx = sumanglesx + sx
sumanglesy = sumanglesy + sy
} else if k < j {
sx := alpha * math.Cos(angles[k])
sy := alpha * math.Sin(angles[k])
sumanglesx = sumanglesx + sx
sumanglesy = sumanglesy + sy
}
}
}
}
k = k + 1
}
m_angles[j] = math.Atan2(sumanglesx, sumanglesy) + (f_intensity*randomizer.Float64() - f_intensity)
j = j + 1
}
angles = m_angles
for f := 0; f <= n_particles; {
vx[f] = v0 * float64(math.Cos(angles[f]))
vy[f] = v0 * float64(math.Sin(angles[f]))
f = f + 1
}
for h := 0; h <= n_particles; {
cosangles = cosangles + (math.Cos(angles[h]))
sinangles = sinangles + (math.Sin(angles[h]))
h = h + 1
}
core := (math.Sqrt(((math.Pow(cosangles, 2)) + (math.Pow(cosangles, 2))))) / float64(n_particles)
order[i] = core
duration := time.Since(start)
fmt.Println(i)
fmt.Println(duration)
i = i + 1
}
vduration := time.Since(vstart)
log.Printf("It took : %s", vduration)
f, _ := os.Create("order.txt")
for i := 0; i <= n_steps; {
s := fmt.Sprintf("%f", order[i])
w := bufio.NewWriter(f)
w.WriteString(s + "\n")
w.Flush()
i = i + 1
}
}
First some minor quick wins:
math.Pow(x, 2)
=> x * x
math.Sin
, math.Cos
=> math.Sincos
But a bigger issue is looping over n_particles2, which is wasteful given that apparently only adjacent particles interact:
for j := 0; j <= n_particles; j++ {
fxi = math.Floor(x[j])
fyi = math.Floor(y[j])
for k := 0; k <= n_particles; k++ {
fxk = math.Floor(x[k])
fyk = math.Floor(y[k])
if (fxi == fxk && fyi == fyk) || (fxi+1 == fxk && fyi == fyk)
|| (fxi-1 == fxk && fyi == fyk) || (fxi == fxk && fyi+1 == fyk)
|| (fxi == fxk && fyi-1 == fyk) || (fxi+1 == fxk && fyi+1 == fyk)
|| (fxi-1 == fxk && fyi-1 == fyk) || (fxi-1 == fxk && fyi+1 == fyk)
|| (fxi+1 == fxk && fyi-1 == fyk) {
// ...
How can you loop over adjacent particles without a N2 nested loop? Perhaps if you maintain a helper adjacency index mapping integral x/y coordinates to particles, you can loop over it in a single pass?
for i := 0; i <= n_steps; i++ {
// . . .
type intpos struct {
x, y int64
}
adjacencyIndex := make(map[intpos][]int)
for j := 0; j <= n_particles; j++ {
// . . .
ix, iy := int64(math.Floor(x[j])), int64(math.Floor(y[j]))
adjacencyIndex[intpos{ix, iy}] = append(adjacencyIndex[intpos{ix, iy}], j)
}
// . . .
for j := 0; j <= n_particles; j++ {
// . . .
ix, iy := int64(math.Floor(x[j])), int64(math.Floor(y[j]))
for dx := -1; dx <= 1; dx++ {
for dy := -1; dy <= 1; dy++ {
adjacentParticles := adjacencyIndex[intpos{ix + int64(dx), iy + int64(dy)}]
for _, k := range adjacentParticles {
if j < k {
// process interaction between particles j and k
}
}
}
}
// . . .
Note that the above will result in a single interaction between each pair of adjacent particles. If you need a bidirectional interaction (i.e. j <=> k
and k <=> j
), change j < k
to j != k
.
As a further improvement you can update the adjacency index as particles move around instead of rebuilding it in each iteration.