I am trying to solve leetcode problem permutations.
But when i test with -benchmem, i found it allocs too much which reach 1957 allocs/op when permute([]int{1,2,3,4,5,6})
I found it escape to heap when generating sub-nums target. Even i try to allocate [6]int, and use unsafe package to build the slice, it still moved to heap
.
My question is, why the slice escape to heap, and how could i allocate the slice on stack?
Here's my code:
package main
import (
"fmt"
"reflect"
"unsafe"
)
func permute(nums []int) [][]int {
resLen := 1
for i := 1; i<= len(nums);i ++{
resLen *= i
}
// pre allocate
res := make([][]int, resLen)
for i := range res{
res[i] = make([]int, 0, len(nums))
}
build(res, nums)
return res
}
func build(res [][]int,targets []int){
step := len(res) / len(targets)
for i := range targets{
for j := i*step; j < (i+1) * step; j ++{
res[j] = append(res[j], targets[i])
}
if len(targets) != 1{
var ab = [6]int{}
var buff []int
var bp *reflect.SliceHeader
bp = (*reflect.SliceHeader)(unsafe.Pointer(&buff))
bp.Data = uintptr(unsafe.Pointer(&ab))
bp.Cap = 6
buff = append(buff, targets[:i]...)
buff = append(buff, targets[i+1:]...)
build(res[i*step:(i+1)*step], buff)
}
}
return
}
func main() {
nums := []int{1,2,3}
res := permute(nums)
fmt.Println(res)
}
build
function without unsafe but escapes to heap:
func build(res [][]int, targets []int) {
step := len(res) / len(targets)
for i := range targets {
for j := i * step; j < (i+1)*step; j++ {
res[j] = append(res[j], targets[i])
}
if len(targets) != 1 {
buff := make([]int, 0, 6) // make([]int, 0, 6) escapes to heap
buff = append(buff, targets[:i]...)
buff = append(buff, targets[i+1:]...)
build(res[i*step:(i+1)*step], buff)
}
}
return
}
And my test case:
package main
import "testing"
func Benchmark(b *testing.B){
for i:=0;i<b.N;i++{
permute([]int{1,2,3,4,5,6})
}
}
When i run go build -gcflags="-m"
, it reports ./main.go:32:8: moved to heap: ab
Trying to subvert the compiler using unsafe.Pointer
is only making it harder for the escape analysis to do its job, preventing the slice from being stack allocated. Simply allocate a single slice and reuse it for each loop iteration:
func build(res [][]int, targets []int) {
buff := make([]int, 0, 6)
step := len(res) / len(targets)
for i := range targets {
buff = buff[:0]
for j := i * step; j < (i+1)*step; j++ {
res[j] = append(res[j], targets[i])
}
if len(targets) != 1 {
buff = append(buff, targets[:i]...)
buff = append(buff, targets[i+1:]...)
build(res[i*step:(i+1)*step], buff)
}
}
return
}
This can be correctly optimized by the compiler
./main.go:26:17: make([]int, 0, 6) does not escape
And will result in only the desired allocations:
Benchmark-8 44607 26838 ns/op 52992 B/op 721 allocs/op