Go高性能编程-不要使用Map缓存大量数据

1、起因

阿森在写代码的时候,有一个通知某个服务器上的所有在线用户的需求,这些用户的信息我是缓存在了Mmap中,大概的代码如下,我在遍历的时候,先是获取读锁,然后for range 遍历。但是这个时候这个节点的用户的上下线还是比较频繁的,大概有几百的qps,这些请求都会获取写锁。这个时候问题就来了。
遍历30w的map居然花了大概10ms,导致后续的写请求都被阻塞。

1
2
3
4
5
6
7
8
9
10
go复制代码//简单缓存
type User struct {
ID int64
xxxx
}

type Cache Struct{
map[int64]*User
mutex
}

所以我看了下go的map的源码

go/src/runtime/map.go at master · golang/go (github.com)

发现go map的遍历效率相比数组来说是比较差的

  1. 遍历新 buckets
1. 确认新buckets对应的旧buckets是否还有数据。
2. 如果有,则遍历旧buckets中将分配到该新buckets中的数据;
3. 如果没有,则遍历该新buckets
4. 遍历bucket 中的所有 cell。每个 bucket 中包含 8 个 cell,从有 key 的 cell 中取出 key 和 value
  1. 遍历新 buckets 中的下一个 bucket,继续以上操作
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
go复制代码type hmap struct {
// Note: the format of the hmap is also encoded in cmd/compile/internal/reflectdata/reflect.go.
// Make sure this stays in sync with the compiler's definition.
count int // # live cells == size of map. Must be first (used by len() builtin)
flags uint8
B uint8 // log_2 of # of buckets (can hold up to loadFactor * 2^B items)
noverflow uint16 // approximate number of overflow buckets; see incrnoverflow for details
hash0 uint32 // hash seed

buckets unsafe.Pointer // array of 2^B Buckets. may be nil if count==0.
oldbuckets unsafe.Pointer // previous bucket array of half the size, non-nil only when growing
nevacuate uintptr // progress counter for evacuation (buckets less than this have been evacuated)

extra *mapextra // optional fields
}

type bmap struct {
tophash [bucketCnt]uint8
}

func mapiterinit(t *maptype, h *hmap, it *hiter) {
if raceenabled && h != nil {
callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapiterinit))
}

it.t = t
if h == nil || h.count == 0 {
return
}

if unsafe.Sizeof(hiter{})/goarch.PtrSize != 12 {
throw("hash_iter size incorrect") // see cmd/compile/internal/reflectdata/reflect.go
}
it.h = h

// grab snapshot of bucket state
it.B = h.B
it.buckets = h.buckets
if !t.Bucket.Pointers() {
// Allocate the current slice and remember pointers to both current and old.
// This preserves all relevant overflow buckets alive even if
// the table grows and/or overflow buckets are added to the table
// while we are iterating.
h.createOverflow()
it.overflow = h.extra.overflow
it.oldoverflow = h.extra.oldoverflow
}

// decide where to start
r := uintptr(rand())
it.startBucket = r & bucketMask(h.B)
it.offset = uint8(r >> h.B & (abi.MapBucketCount - 1))

// iterator state
it.bucket = it.startBucket

// Remember we have an iterator.
// Can run concurrently with another mapiterinit().
if old := h.flags; old&(iterator|oldIterator) != iterator|oldIterator {
atomic.Or8(&h.flags, iterator|oldIterator)
}

mapiternext(it)
}

func mapiternext(it *hiter) {
h := it.h
if raceenabled {
callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapiternext))
}
if h.flags&hashWriting != 0 {
fatal("concurrent map iteration and map write")
}
t := it.t
bucket := it.bucket
b := it.bptr
i := it.i
checkBucket := it.checkBucket

next:
if b == nil {
if bucket == it.startBucket && it.wrapped {
// end of iteration
it.key = nil
it.elem = nil
return
}
if h.growing() && it.B == h.B {
// Iterator was started in the middle of a grow, and the grow isn't done yet.
// If the bucket we're looking at hasn't been filled in yet (i.e. the old
// bucket hasn't been evacuated) then we need to iterate through the old
// bucket and only return the ones that will be migrated to this bucket.
oldbucket := bucket & it.h.oldbucketmask()
b = (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.BucketSize)))
if !evacuated(b) {
checkBucket = bucket
} else {
b = (*bmap)(add(it.buckets, bucket*uintptr(t.BucketSize)))
checkBucket = noCheck
}
} else {
b = (*bmap)(add(it.buckets, bucket*uintptr(t.BucketSize)))
checkBucket = noCheck
}
bucket++
if bucket == bucketShift(it.B) {
bucket = 0
it.wrapped = true
}
i = 0
}
for ; i < abi.MapBucketCount; i++ {
offi := (i + it.offset) & (abi.MapBucketCount - 1)
if isEmpty(b.tophash[offi]) || b.tophash[offi] == evacuatedEmpty {
// TODO: emptyRest is hard to use here, as we start iterating
// in the middle of a bucket. It's feasible, just tricky.
continue
}
k := add(unsafe.Pointer(b), dataOffset+uintptr(offi)*uintptr(t.KeySize))
if t.IndirectKey() {
k = *((*unsafe.Pointer)(k))
}
e := add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*uintptr(t.KeySize)+uintptr(offi)*uintptr(t.ValueSize))
if checkBucket != noCheck && !h.sameSizeGrow() {
// Special case: iterator was started during a grow to a larger size
// and the grow is not done yet. We're working on a bucket whose
// oldbucket has not been evacuated yet. Or at least, it wasn't
// evacuated when we started the bucket. So we're iterating
// through the oldbucket, skipping any keys that will go
// to the other new bucket (each oldbucket expands to two
// buckets during a grow).
if t.ReflexiveKey() || t.Key.Equal(k, k) {
// If the item in the oldbucket is not destined for
// the current new bucket in the iteration, skip it.
hash := t.Hasher(k, uintptr(h.hash0))
if hash&bucketMask(it.B) != checkBucket {
continue
}
} else {
// Hash isn't repeatable if k != k (NaNs). We need a
// repeatable and randomish choice of which direction
// to send NaNs during evacuation. We'll use the low
// bit of tophash to decide which way NaNs go.
// NOTE: this case is why we need two evacuate tophash
// values, evacuatedX and evacuatedY, that differ in
// their low bit.
if checkBucket>>(it.B-1) != uintptr(b.tophash[offi]&1) {
continue
}
}
}
if (b.tophash[offi] != evacuatedX && b.tophash[offi] != evacuatedY) ||
!(t.ReflexiveKey() || t.Key.Equal(k, k)) {
// This is the golden data, we can return it.
// OR
// key!=key, so the entry can't be deleted or updated, so we can just return it.
// That's lucky for us because when key!=key we can't look it up successfully.
it.key = k
if t.IndirectElem() {
e = *((*unsafe.Pointer)(e))
}
it.elem = e
} else {
// The hash table has grown since the iterator was started.
// The golden data for this key is now somewhere else.
// Check the current hash table for the data.
// This code handles the case where the key
// has been deleted, updated, or deleted and reinserted.
// NOTE: we need to regrab the key as it has potentially been
// updated to an equal() but not identical key (e.g. +0.0 vs -0.0).
rk, re := mapaccessK(t, h, k)
if rk == nil {
continue // key has been deleted
}
it.key = rk
it.elem = re
}
it.bucket = bucket
if it.bptr != b { // avoid unnecessary write barrier; see issue 14921
it.bptr = b
}
it.i = i + 1
it.checkBucket = checkBucket
return
}
b = b.overflow(t)
i = 0
goto next
}

数组的遍历就很好理解了,这里就不上源码了,大概就是找到数组的开始位置直接在连续内存块上扫描

在存储50w数据的时候,数组的遍历效率大概是map的10倍,数量越大效率越高

2、延展

既然数组的遍历效率比map高这么多,那么是否还有其他的性能影响呢?

这个时候我比较了下同样的数量的[]User 和map[int64] User的gc耗时

发现在储存500000数据量的场景下 数组的gc的耗时居然也比Map快上了5倍

*cpu: m1 pro

*memory: 32G

*Go map gc x 10 cost: 235.258125ms.

*Slice map gc x 10 cost: 43.631375ms.

3、改造

很明显,我们直接用map缓存大量数据的时候,无论是gc还是遍历效率,都是非常低的。

所以我计划用数组和map结合实现一个遍历效率和数组一样,而且插入更新操作和Map一样

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
go复制代码type KV[K comparable] struct {
Key K
Value any
}

func (kv *KV[K]) GetKey() K {
return kv.Key
}

type SliceMap[K comparable] struct {
DataSlice []*KV[K]
IndexMap map[K]int
maxIndex int
maxMaxNoUseCount int
maxNoUsePercent float64
}

数组里面储存实际的指针,map里面存储key对应的数组的index这样既能获得数组的高效便利和更快GC,还能获得map的快速插入删除更新。代价也很明显内存cost变多了。

你可以在此查看代码和文档:hswell/slicemap (github.com)

能点个赞就最好了

本文转载自: 掘金

开发者博客 – 和开发相关的 这里全都有

0%