1 | /*-
|
---|
2 | * Copyright (c) 2004, 2005,
|
---|
3 | * Bosko Milekic <[email protected]>. All rights reserved.
|
---|
4 | *
|
---|
5 | * Redistribution and use in source and binary forms, with or without
|
---|
6 | * modification, are permitted provided that the following conditions
|
---|
7 | * are met:
|
---|
8 | * 1. Redistributions of source code must retain the above copyright
|
---|
9 | * notice unmodified, this list of conditions and the following
|
---|
10 | * disclaimer.
|
---|
11 | * 2. Redistributions in binary form must reproduce the above copyright
|
---|
12 | * notice, this list of conditions and the following disclaimer in the
|
---|
13 | * documentation and/or other materials provided with the distribution.
|
---|
14 | *
|
---|
15 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
---|
16 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
---|
17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
---|
18 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
---|
19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
---|
20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
---|
21 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
---|
22 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
---|
23 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
---|
24 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
---|
25 | * SUCH DAMAGE.
|
---|
26 | */
|
---|
27 |
|
---|
28 | #ifndef VBOX
|
---|
29 | #include <sys/cdefs.h>
|
---|
30 | __FBSDID("$FreeBSD: src/sys/kern/kern_mbuf.c,v 1.32.2.5.2.1 2009/04/15 03:14:26 kensmith Exp $");
|
---|
31 |
|
---|
32 | #include "opt_mac.h"
|
---|
33 | #include "opt_param.h"
|
---|
34 |
|
---|
35 | #include <sys/param.h>
|
---|
36 | #include <sys/malloc.h>
|
---|
37 | #include <sys/systm.h>
|
---|
38 | #include <sys/mbuf.h>
|
---|
39 | #include <sys/domain.h>
|
---|
40 | #include <sys/eventhandler.h>
|
---|
41 | #include <sys/kernel.h>
|
---|
42 | #include <sys/protosw.h>
|
---|
43 | #include <sys/smp.h>
|
---|
44 | #include <sys/sysctl.h>
|
---|
45 |
|
---|
46 | #include <security/mac/mac_framework.h>
|
---|
47 |
|
---|
48 | #include <vm/vm.h>
|
---|
49 | #include <vm/vm_page.h>
|
---|
50 | #include <vm/uma.h>
|
---|
51 | #include <vm/uma_int.h>
|
---|
52 | #include <vm/uma_dbg.h>
|
---|
53 | #else
|
---|
54 | # include <iprt/param.h>
|
---|
55 | # include <slirp.h>
|
---|
56 | # define IN_BSD
|
---|
57 | # include "ext.h"
|
---|
58 | #endif
|
---|
59 |
|
---|
60 | /*
|
---|
61 | * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA
|
---|
62 | * Zones.
|
---|
63 | *
|
---|
64 | * Mbuf Clusters (2K, contiguous) are allocated from the Cluster
|
---|
65 | * Zone. The Zone can be capped at kern.ipc.nmbclusters, if the
|
---|
66 | * administrator so desires.
|
---|
67 | *
|
---|
68 | * Mbufs are allocated from a UMA Master Zone called the Mbuf
|
---|
69 | * Zone.
|
---|
70 | *
|
---|
71 | * Additionally, FreeBSD provides a Packet Zone, which it
|
---|
72 | * configures as a Secondary Zone to the Mbuf Master Zone,
|
---|
73 | * thus sharing backend Slab kegs with the Mbuf Master Zone.
|
---|
74 | *
|
---|
75 | * Thus common-case allocations and locking are simplified:
|
---|
76 | *
|
---|
77 | * m_clget() m_getcl()
|
---|
78 | * | |
|
---|
79 | * | .------------>[(Packet Cache)] m_get(), m_gethdr()
|
---|
80 | * | | [ Packet ] |
|
---|
81 | * [(Cluster Cache)] [ Secondary ] [ (Mbuf Cache) ]
|
---|
82 | * [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ]
|
---|
83 | * | \________ |
|
---|
84 | * [ Cluster Keg ] \ /
|
---|
85 | * | [ Mbuf Keg ]
|
---|
86 | * [ Cluster Slabs ] |
|
---|
87 | * | [ Mbuf Slabs ]
|
---|
88 | * \____________(VM)_________________/
|
---|
89 | *
|
---|
90 | *
|
---|
91 | * Whenever an object is allocated with uma_zalloc() out of
|
---|
92 | * one of the Zones its _ctor_ function is executed. The same
|
---|
93 | * for any deallocation through uma_zfree() the _dtor_ function
|
---|
94 | * is executed.
|
---|
95 | *
|
---|
96 | * Caches are per-CPU and are filled from the Master Zone.
|
---|
97 | *
|
---|
98 | * Whenever an object is allocated from the underlying global
|
---|
99 | * memory pool it gets pre-initialized with the _zinit_ functions.
|
---|
100 | * When the Keg's are overfull objects get decomissioned with
|
---|
101 | * _zfini_ functions and free'd back to the global memory pool.
|
---|
102 | *
|
---|
103 | */
|
---|
104 |
|
---|
105 | #ifndef VBOX
|
---|
106 | int nmbclusters; /* limits number of mbuf clusters */
|
---|
107 | int nmbjumbop; /* limits number of page size jumbo clusters */
|
---|
108 | int nmbjumbo9; /* limits number of 9k jumbo clusters */
|
---|
109 | int nmbjumbo16; /* limits number of 16k jumbo clusters */
|
---|
110 | struct mbstat mbstat;
|
---|
111 | #endif
|
---|
112 |
|
---|
113 | /*
|
---|
114 | * tunable_mbinit() has to be run before init_maxsockets() thus
|
---|
115 | * the SYSINIT order below is SI_ORDER_MIDDLE while init_maxsockets()
|
---|
116 | * runs at SI_ORDER_ANY.
|
---|
117 | */
|
---|
118 | static void
|
---|
119 | tunable_mbinit(void *dummy)
|
---|
120 | {
|
---|
121 | #ifdef VBOX
|
---|
122 | PNATState pData = (PNATState)dummy;
|
---|
123 | #endif
|
---|
124 | /* This has to be done before VM init. */
|
---|
125 | nmbclusters = 1024 + maxusers * 64;
|
---|
126 | nmbjumbop = nmbclusters / 2;
|
---|
127 | nmbjumbo9 = nmbjumbop / 2;
|
---|
128 | nmbjumbo16 = nmbjumbo9 / 2;
|
---|
129 | TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters);
|
---|
130 | }
|
---|
131 | SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_mbinit, NULL);
|
---|
132 |
|
---|
133 | #ifndef VBOX
|
---|
134 | /* XXX: These should be tuneables. Can't change UMA limits on the fly. */
|
---|
135 | static int
|
---|
136 | sysctl_nmbclusters(SYSCTL_HANDLER_ARGS)
|
---|
137 | {
|
---|
138 | int error, newnmbclusters;
|
---|
139 |
|
---|
140 | newnmbclusters = nmbclusters;
|
---|
141 | error = sysctl_handle_int(oidp, &newnmbclusters, 0, req);
|
---|
142 | if (error == 0 && req->newptr) {
|
---|
143 | if (newnmbclusters > nmbclusters) {
|
---|
144 | nmbclusters = newnmbclusters;
|
---|
145 | uma_zone_set_max(zone_clust, nmbclusters);
|
---|
146 | EVENTHANDLER_INVOKE(nmbclusters_change);
|
---|
147 | } else
|
---|
148 | error = EINVAL;
|
---|
149 | }
|
---|
150 | return (error);
|
---|
151 | }
|
---|
152 | SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, CTLTYPE_INT|CTLFLAG_RW,
|
---|
153 | &nmbclusters, 0, sysctl_nmbclusters, "IU",
|
---|
154 | "Maximum number of mbuf clusters allowed");
|
---|
155 |
|
---|
156 | static int
|
---|
157 | sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS)
|
---|
158 | {
|
---|
159 | int error, newnmbjumbop;
|
---|
160 |
|
---|
161 | newnmbjumbop = nmbjumbop;
|
---|
162 | error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req);
|
---|
163 | if (error == 0 && req->newptr) {
|
---|
164 | if (newnmbjumbop> nmbjumbop) {
|
---|
165 | nmbjumbop = newnmbjumbop;
|
---|
166 | uma_zone_set_max(zone_jumbop, nmbjumbop);
|
---|
167 | } else
|
---|
168 | error = EINVAL;
|
---|
169 | }
|
---|
170 | return (error);
|
---|
171 | }
|
---|
172 | SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW,
|
---|
173 | &nmbjumbop, 0, sysctl_nmbjumbop, "IU",
|
---|
174 | "Maximum number of mbuf page size jumbo clusters allowed");
|
---|
175 |
|
---|
176 |
|
---|
177 | static int
|
---|
178 | sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS)
|
---|
179 | {
|
---|
180 | int error, newnmbjumbo9;
|
---|
181 |
|
---|
182 | newnmbjumbo9 = nmbjumbo9;
|
---|
183 | error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req);
|
---|
184 | if (error == 0 && req->newptr) {
|
---|
185 | if (newnmbjumbo9> nmbjumbo9) {
|
---|
186 | nmbjumbo9 = newnmbjumbo9;
|
---|
187 | uma_zone_set_max(zone_jumbo9, nmbjumbo9);
|
---|
188 | } else
|
---|
189 | error = EINVAL;
|
---|
190 | }
|
---|
191 | return (error);
|
---|
192 | }
|
---|
193 | SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW,
|
---|
194 | &nmbjumbo9, 0, sysctl_nmbjumbo9, "IU",
|
---|
195 | "Maximum number of mbuf 9k jumbo clusters allowed");
|
---|
196 |
|
---|
197 | static int
|
---|
198 | sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS)
|
---|
199 | {
|
---|
200 | int error, newnmbjumbo16;
|
---|
201 |
|
---|
202 | newnmbjumbo16 = nmbjumbo16;
|
---|
203 | error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req);
|
---|
204 | if (error == 0 && req->newptr) {
|
---|
205 | if (newnmbjumbo16> nmbjumbo16) {
|
---|
206 | nmbjumbo16 = newnmbjumbo16;
|
---|
207 | uma_zone_set_max(zone_jumbo16, nmbjumbo16);
|
---|
208 | } else
|
---|
209 | error = EINVAL;
|
---|
210 | }
|
---|
211 | return (error);
|
---|
212 | }
|
---|
213 | SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT|CTLFLAG_RW,
|
---|
214 | &nmbjumbo16, 0, sysctl_nmbjumbo16, "IU",
|
---|
215 | "Maximum number of mbuf 16k jumbo clusters allowed");
|
---|
216 |
|
---|
217 |
|
---|
218 |
|
---|
219 | SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat,
|
---|
220 | "Mbuf general information and statistics");
|
---|
221 |
|
---|
222 | /*
|
---|
223 | * Zones from which we allocate.
|
---|
224 | */
|
---|
225 | uma_zone_t zone_mbuf;
|
---|
226 | uma_zone_t zone_clust;
|
---|
227 | uma_zone_t zone_pack;
|
---|
228 | uma_zone_t zone_jumbop;
|
---|
229 | uma_zone_t zone_jumbo9;
|
---|
230 | uma_zone_t zone_jumbo16;
|
---|
231 | uma_zone_t zone_ext_refcnt;
|
---|
232 |
|
---|
233 | /*
|
---|
234 | * Local prototypes.
|
---|
235 | */
|
---|
236 | static int mb_ctor_mbuf(void *, int, void *, int);
|
---|
237 | static int mb_ctor_clust(void *, int, void *, int);
|
---|
238 | static int mb_ctor_pack(void *, int, void *, int);
|
---|
239 | static void mb_dtor_mbuf(void *, int, void *);
|
---|
240 | static void mb_dtor_clust(void *, int, void *);
|
---|
241 | static void mb_dtor_pack(void *, int, void *);
|
---|
242 | static int mb_zinit_pack(void *, int, int);
|
---|
243 | static void mb_zfini_pack(void *, int);
|
---|
244 | #else
|
---|
245 | /*
|
---|
246 | * Local prototypes.
|
---|
247 | */
|
---|
248 | static int mb_ctor_mbuf(PNATState, void *, int, void *, int);
|
---|
249 | static int mb_ctor_clust(PNATState, void *, int, void *, int);
|
---|
250 | static int mb_ctor_pack(PNATState, void *, int, void *, int);
|
---|
251 | static void mb_dtor_mbuf(PNATState, void *, int, void *);
|
---|
252 | static void mb_dtor_clust(PNATState, void *, int, void *);
|
---|
253 | static void mb_dtor_pack(PNATState, void *, int, void *);
|
---|
254 | static int mb_zinit_pack(PNATState, void *, int, int);
|
---|
255 | static void mb_zfini_pack(PNATState, void *, int);
|
---|
256 | #endif
|
---|
257 |
|
---|
258 | /*static void mb_reclaim(void *); - unused */
|
---|
259 | #ifndef VBOX
|
---|
260 | static void mbuf_init(void *);
|
---|
261 | static void *mbuf_jumbo_alloc(uma_zone_t, int, u_int8_t *, int);
|
---|
262 | static void mbuf_jumbo_free(void *, int, u_int8_t);
|
---|
263 | #endif
|
---|
264 |
|
---|
265 | #ifndef VBOX
|
---|
266 | static MALLOC_DEFINE(M_JUMBOFRAME, "jumboframes", "mbuf jumbo frame buffers");
|
---|
267 |
|
---|
268 | /* Ensure that MSIZE doesn't break dtom() - it must be a power of 2 */
|
---|
269 | CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
|
---|
270 | #else
|
---|
271 | #define uma_zcreate(a0, a1, a2, a3, a4, a5, a6, a7) \
|
---|
272 | uma_zcreate(pData, a0, a1, a2, a3, a4, a5, a6, a7)
|
---|
273 | #endif
|
---|
274 |
|
---|
275 | /*
|
---|
276 | * Initialize FreeBSD Network buffer allocation.
|
---|
277 | */
|
---|
278 | SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
|
---|
279 | #ifndef VBOX
|
---|
280 | static void
|
---|
281 | #else
|
---|
282 | void
|
---|
283 | #endif
|
---|
284 | mbuf_init(void *dummy)
|
---|
285 | {
|
---|
286 |
|
---|
287 | /*
|
---|
288 | * Configure UMA zones for Mbufs, Clusters, and Packets.
|
---|
289 | */
|
---|
290 | #ifndef VBOX
|
---|
291 | zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE,
|
---|
292 | mb_ctor_mbuf, mb_dtor_mbuf,
|
---|
293 | #ifdef INVARIANTS
|
---|
294 | trash_init, trash_fini,
|
---|
295 | #else
|
---|
296 | NULL, NULL,
|
---|
297 | #endif
|
---|
298 | MSIZE - 1, UMA_ZONE_MAXBUCKET);
|
---|
299 |
|
---|
300 | zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES,
|
---|
301 | mb_ctor_clust, mb_dtor_clust,
|
---|
302 | #ifdef INVARIANTS
|
---|
303 | trash_init, trash_fini,
|
---|
304 | #else
|
---|
305 | NULL, NULL,
|
---|
306 | #endif
|
---|
307 | UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
|
---|
308 | #else /*!VBOX*/
|
---|
309 | PNATState pData = (PNATState)dummy;
|
---|
310 | tunable_mbinit(pData);
|
---|
311 | zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE,
|
---|
312 | mb_ctor_mbuf, mb_dtor_mbuf,
|
---|
313 | NULL, NULL,
|
---|
314 | MSIZE - 1, UMA_ZONE_MAXBUCKET);
|
---|
315 | if (nmbclusters > 0)
|
---|
316 | uma_zone_set_max(zone_mbuf, nmbclusters);
|
---|
317 |
|
---|
318 | zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES,
|
---|
319 | mb_ctor_clust, mb_dtor_clust,
|
---|
320 | NULL, NULL,
|
---|
321 | UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
|
---|
322 | #endif /*VBOX*/
|
---|
323 | if (nmbclusters > 0)
|
---|
324 | uma_zone_set_max(zone_clust, nmbclusters);
|
---|
325 |
|
---|
326 | zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack,
|
---|
327 | mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf);
|
---|
328 |
|
---|
329 | /* Make jumbo frame zone too. Page size, 9k and 16k. */
|
---|
330 | #ifndef VBOX
|
---|
331 | zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE,
|
---|
332 | mb_ctor_clust, mb_dtor_clust,
|
---|
333 | #ifdef INVARIANTS
|
---|
334 | trash_init, trash_fini,
|
---|
335 | #else
|
---|
336 | NULL, NULL,
|
---|
337 | #endif
|
---|
338 | UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
|
---|
339 | if (nmbjumbop > 0)
|
---|
340 | uma_zone_set_max(zone_jumbop, nmbjumbop);
|
---|
341 |
|
---|
342 | zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES,
|
---|
343 | mb_ctor_clust, mb_dtor_clust,
|
---|
344 | #ifdef INVARIANTS
|
---|
345 | trash_init, trash_fini,
|
---|
346 | #else
|
---|
347 | NULL, NULL,
|
---|
348 | #endif
|
---|
349 | UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
|
---|
350 | if (nmbjumbo9 > 0)
|
---|
351 | uma_zone_set_max(zone_jumbo9, nmbjumbo9);
|
---|
352 | uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc);
|
---|
353 | uma_zone_set_freef(zone_jumbo9, mbuf_jumbo_free);
|
---|
354 |
|
---|
355 | zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES,
|
---|
356 | mb_ctor_clust, mb_dtor_clust,
|
---|
357 | #ifdef INVARIANTS
|
---|
358 | trash_init, trash_fini,
|
---|
359 | #else
|
---|
360 | NULL, NULL,
|
---|
361 | #endif
|
---|
362 | UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
|
---|
363 | if (nmbjumbo16 > 0)
|
---|
364 | uma_zone_set_max(zone_jumbo16, nmbjumbo16);
|
---|
365 | #else /*!VBOX*/
|
---|
366 | zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE,
|
---|
367 | mb_ctor_clust, mb_dtor_clust,
|
---|
368 | NULL, NULL,
|
---|
369 | UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
|
---|
370 | if (nmbjumbop > 0)
|
---|
371 | uma_zone_set_max(zone_jumbop, nmbjumbop);
|
---|
372 |
|
---|
373 | zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES,
|
---|
374 | mb_ctor_clust, mb_dtor_clust,
|
---|
375 | NULL, NULL,
|
---|
376 | UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
|
---|
377 | if (nmbjumbo9 > 0)
|
---|
378 | uma_zone_set_max(zone_jumbo9, nmbjumbo9);
|
---|
379 |
|
---|
380 | zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES,
|
---|
381 | mb_ctor_clust, mb_dtor_clust,
|
---|
382 | NULL, NULL,
|
---|
383 | UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
|
---|
384 | if (nmbjumbo16 > 0)
|
---|
385 | uma_zone_set_max(zone_jumbo16, nmbjumbo16);
|
---|
386 | #endif /*VBOX*/
|
---|
387 |
|
---|
388 | zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int),
|
---|
389 | NULL, NULL,
|
---|
390 | NULL, NULL,
|
---|
391 | UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
|
---|
392 |
|
---|
393 | /* uma_prealloc() goes here... */
|
---|
394 |
|
---|
395 | /*
|
---|
396 | * Hook event handler for low-memory situation, used to
|
---|
397 | * drain protocols and push data back to the caches (UMA
|
---|
398 | * later pushes it back to VM).
|
---|
399 | */
|
---|
400 | EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL,
|
---|
401 | EVENTHANDLER_PRI_FIRST);
|
---|
402 |
|
---|
403 | /*
|
---|
404 | * [Re]set counters and local statistics knobs.
|
---|
405 | * XXX Some of these should go and be replaced, but UMA stat
|
---|
406 | * gathering needs to be revised.
|
---|
407 | */
|
---|
408 | mbstat.m_mbufs = 0;
|
---|
409 | mbstat.m_mclusts = 0;
|
---|
410 | mbstat.m_drain = 0;
|
---|
411 | mbstat.m_msize = MSIZE;
|
---|
412 | mbstat.m_mclbytes = MCLBYTES;
|
---|
413 | mbstat.m_minclsize = MINCLSIZE;
|
---|
414 | mbstat.m_mlen = MLEN;
|
---|
415 | mbstat.m_mhlen = MHLEN;
|
---|
416 | mbstat.m_numtypes = MT_NTYPES;
|
---|
417 |
|
---|
418 | mbstat.m_mcfail = mbstat.m_mpfail = 0;
|
---|
419 | mbstat.sf_iocnt = 0;
|
---|
420 | mbstat.sf_allocwait = mbstat.sf_allocfail = 0;
|
---|
421 | }
|
---|
422 |
|
---|
423 | #ifndef VBOX
|
---|
424 | /*
|
---|
425 | * UMA backend page allocator for the jumbo frame zones.
|
---|
426 | *
|
---|
427 | * Allocates kernel virtual memory that is backed by contiguous physical
|
---|
428 | * pages.
|
---|
429 | */
|
---|
430 | static void *
|
---|
431 | mbuf_jumbo_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int fWait)
|
---|
432 | {
|
---|
433 |
|
---|
434 | /* Inform UMA that this allocator uses kernel_map/object. */
|
---|
435 | *flags = UMA_SLAB_KERNEL;
|
---|
436 | return (contigmalloc(bytes, M_JUMBOFRAME, fWait, (vm_paddr_t)0,
|
---|
437 | ~(vm_paddr_t)0, 1, 0));
|
---|
438 | }
|
---|
439 |
|
---|
440 | /*
|
---|
441 | * UMA backend page deallocator for the jumbo frame zones.
|
---|
442 | */
|
---|
443 | static void
|
---|
444 | mbuf_jumbo_free(void *mem, int size, u_int8_t flags)
|
---|
445 | {
|
---|
446 |
|
---|
447 | contigfree(mem, size, M_JUMBOFRAME);
|
---|
448 | }
|
---|
449 | #endif
|
---|
450 |
|
---|
451 | /*
|
---|
452 | * Constructor for Mbuf master zone.
|
---|
453 | *
|
---|
454 | * The 'arg' pointer points to a mb_args structure which
|
---|
455 | * contains call-specific information required to support the
|
---|
456 | * mbuf allocation API. See mbuf.h.
|
---|
457 | */
|
---|
458 | static int
|
---|
459 | #ifndef VBOX
|
---|
460 | mb_ctor_mbuf(void *mem, int size, void *arg, int how)
|
---|
461 | #else
|
---|
462 | mb_ctor_mbuf(PNATState pData, void *mem, int size, void *arg, int how)
|
---|
463 | #endif
|
---|
464 | {
|
---|
465 | struct mbuf *m;
|
---|
466 | struct mb_args *args;
|
---|
467 | #ifdef MAC
|
---|
468 | int error;
|
---|
469 | #endif
|
---|
470 | int flags;
|
---|
471 | short type;
|
---|
472 | #ifdef VBOX
|
---|
473 | NOREF(pData);
|
---|
474 | #endif
|
---|
475 |
|
---|
476 | #ifdef INVARIANTS
|
---|
477 | trash_ctor(mem, size, arg, how);
|
---|
478 | #elif defined(VBOX)
|
---|
479 | NOREF(size);
|
---|
480 | NOREF(how);
|
---|
481 | #endif
|
---|
482 | m = (struct mbuf *)mem;
|
---|
483 | args = (struct mb_args *)arg;
|
---|
484 | flags = args->flags;
|
---|
485 | type = args->type;
|
---|
486 |
|
---|
487 | /*
|
---|
488 | * The mbuf is initialized later. The caller has the
|
---|
489 | * responsibility to set up any MAC labels too.
|
---|
490 | */
|
---|
491 | if (type == MT_NOINIT)
|
---|
492 | return (0);
|
---|
493 |
|
---|
494 | m->m_next = NULL;
|
---|
495 | m->m_nextpkt = NULL;
|
---|
496 | m->m_len = 0;
|
---|
497 | m->m_flags = flags;
|
---|
498 | m->m_type = type;
|
---|
499 | if (flags & M_PKTHDR) {
|
---|
500 | m->m_data = m->m_pktdat;
|
---|
501 | m->m_pkthdr.rcvif = NULL;
|
---|
502 | m->m_pkthdr.len = 0;
|
---|
503 | m->m_pkthdr.header = NULL;
|
---|
504 | m->m_pkthdr.csum_flags = 0;
|
---|
505 | m->m_pkthdr.csum_data = 0;
|
---|
506 | m->m_pkthdr.tso_segsz = 0;
|
---|
507 | m->m_pkthdr.ether_vtag = 0;
|
---|
508 | SLIST_INIT(&m->m_pkthdr.tags);
|
---|
509 | #ifdef MAC
|
---|
510 | /* If the label init fails, fail the alloc */
|
---|
511 | error = mac_init_mbuf(m, how);
|
---|
512 | if (error)
|
---|
513 | return (error);
|
---|
514 | #endif
|
---|
515 | } else
|
---|
516 | m->m_data = m->m_dat;
|
---|
517 | return (0);
|
---|
518 | }
|
---|
519 |
|
---|
520 | /*
|
---|
521 | * The Mbuf master zone destructor.
|
---|
522 | */
|
---|
523 | static void
|
---|
524 | #ifndef VBOX
|
---|
525 | mb_dtor_mbuf(void *mem, int size, void *arg)
|
---|
526 | #else
|
---|
527 | mb_dtor_mbuf(PNATState pData, void *mem, int size, void *arg)
|
---|
528 | #endif
|
---|
529 | {
|
---|
530 | struct mbuf *m;
|
---|
531 | uintptr_t flags;
|
---|
532 | #ifdef VBOX
|
---|
533 | NOREF(pData);
|
---|
534 | #endif
|
---|
535 |
|
---|
536 | m = (struct mbuf *)mem;
|
---|
537 | flags = (uintptr_t)arg;
|
---|
538 |
|
---|
539 | if ((flags & MB_NOTAGS) == 0 && (m->m_flags & M_PKTHDR) != 0)
|
---|
540 | m_tag_delete_chain(m, NULL);
|
---|
541 | KASSERT((m->m_flags & M_EXT) == 0, ("%s: M_EXT set", __func__));
|
---|
542 | KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__));
|
---|
543 | #ifdef INVARIANTS
|
---|
544 | trash_dtor(mem, size, arg);
|
---|
545 | #elif defined(VBOX)
|
---|
546 | NOREF(size);
|
---|
547 | NOREF(arg);
|
---|
548 | #endif
|
---|
549 | }
|
---|
550 |
|
---|
551 | /*
|
---|
552 | * The Mbuf Packet zone destructor.
|
---|
553 | */
|
---|
554 | static void
|
---|
555 | #ifndef VBOX
|
---|
556 | mb_dtor_pack(void *mem, int size, void *arg)
|
---|
557 | #else
|
---|
558 | mb_dtor_pack(PNATState pData, void *mem, int size, void *arg)
|
---|
559 | #endif
|
---|
560 | {
|
---|
561 | struct mbuf *m;
|
---|
562 |
|
---|
563 | m = (struct mbuf *)mem;
|
---|
564 | if ((m->m_flags & M_PKTHDR) != 0)
|
---|
565 | m_tag_delete_chain(m, NULL);
|
---|
566 |
|
---|
567 | /* Make sure we've got a clean cluster back. */
|
---|
568 | KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__));
|
---|
569 | KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__));
|
---|
570 | KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__));
|
---|
571 | KASSERT(m->m_ext.ext_args == NULL, ("%s: ext_args != NULL", __func__));
|
---|
572 | KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__));
|
---|
573 | KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__));
|
---|
574 | KASSERT(*m->m_ext.ref_cnt == 1, ("%s: ref_cnt != 1", __func__));
|
---|
575 | #ifdef INVARIANTS
|
---|
576 | trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg);
|
---|
577 | #elif defined(VBOX)
|
---|
578 | NOREF(size);
|
---|
579 | NOREF(arg);
|
---|
580 | #endif
|
---|
581 | /*
|
---|
582 | * If there are processes blocked on zone_clust, waiting for pages to be freed up,
|
---|
583 | * cause them to be woken up by draining the packet zone. We are exposed to a race here
|
---|
584 | * (in the check for the UMA_ZFLAG_FULL) where we might miss the flag set, but that is
|
---|
585 | * deliberate. We don't want to acquire the zone lock for every mbuf free.
|
---|
586 | */
|
---|
587 | if (uma_zone_exhausted_nolock(zone_clust))
|
---|
588 | zone_drain(zone_pack);
|
---|
589 | }
|
---|
590 |
|
---|
591 | /*
|
---|
592 | * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor.
|
---|
593 | *
|
---|
594 | * Here the 'arg' pointer points to the Mbuf which we
|
---|
595 | * are configuring cluster storage for. If 'arg' is
|
---|
596 | * empty we allocate just the cluster without setting
|
---|
597 | * the mbuf to it. See mbuf.h.
|
---|
598 | */
|
---|
599 | static int
|
---|
600 | #ifndef VBOX
|
---|
601 | mb_ctor_clust(void *mem, int size, void *arg, int how)
|
---|
602 | #else
|
---|
603 | mb_ctor_clust(PNATState pData, void *mem, int size, void *arg, int how)
|
---|
604 | #endif
|
---|
605 | {
|
---|
606 | struct mbuf *m;
|
---|
607 | u_int *refcnt;
|
---|
608 | int type;
|
---|
609 | uma_zone_t zone;
|
---|
610 | #ifdef VBOX
|
---|
611 | NOREF(how);
|
---|
612 | #endif
|
---|
613 |
|
---|
614 | #ifdef INVARIANTS
|
---|
615 | trash_ctor(mem, size, arg, how);
|
---|
616 | #elif defined(VBOX)
|
---|
617 | NOREF(how);
|
---|
618 | #endif
|
---|
619 | switch (size) {
|
---|
620 | case MCLBYTES:
|
---|
621 | type = EXT_CLUSTER;
|
---|
622 | zone = zone_clust;
|
---|
623 | break;
|
---|
624 | #if MJUMPAGESIZE != MCLBYTES
|
---|
625 | case MJUMPAGESIZE:
|
---|
626 | type = EXT_JUMBOP;
|
---|
627 | zone = zone_jumbop;
|
---|
628 | break;
|
---|
629 | #endif
|
---|
630 | case MJUM9BYTES:
|
---|
631 | type = EXT_JUMBO9;
|
---|
632 | zone = zone_jumbo9;
|
---|
633 | break;
|
---|
634 | case MJUM16BYTES:
|
---|
635 | type = EXT_JUMBO16;
|
---|
636 | zone = zone_jumbo16;
|
---|
637 | break;
|
---|
638 | default:
|
---|
639 | panic("unknown cluster size");
|
---|
640 | break;
|
---|
641 | }
|
---|
642 |
|
---|
643 | m = (struct mbuf *)arg;
|
---|
644 | refcnt = uma_find_refcnt(zone, mem);
|
---|
645 | *refcnt = 1;
|
---|
646 | if (m != NULL) {
|
---|
647 | m->m_ext.ext_buf = (caddr_t)mem;
|
---|
648 | m->m_data = m->m_ext.ext_buf;
|
---|
649 | m->m_flags |= M_EXT;
|
---|
650 | m->m_ext.ext_free = NULL;
|
---|
651 | m->m_ext.ext_args = NULL;
|
---|
652 | m->m_ext.ext_size = size;
|
---|
653 | m->m_ext.ext_type = type;
|
---|
654 | m->m_ext.ref_cnt = refcnt;
|
---|
655 | }
|
---|
656 |
|
---|
657 | return (0);
|
---|
658 | }
|
---|
659 |
|
---|
660 | /*
|
---|
661 | * The Mbuf Cluster zone destructor.
|
---|
662 | */
|
---|
663 | static void
|
---|
664 | #ifndef VBOX
|
---|
665 | mb_dtor_clust(void *mem, int size, void *arg)
|
---|
666 | #else
|
---|
667 | mb_dtor_clust(PNATState pData, void *mem, int size, void *arg)
|
---|
668 | #endif
|
---|
669 | {
|
---|
670 | #ifdef INVARIANTS
|
---|
671 | uma_zone_t zone;
|
---|
672 |
|
---|
673 | zone = m_getzone(size);
|
---|
674 | KASSERT(*(uma_find_refcnt(zone, mem)) <= 1,
|
---|
675 | ("%s: refcnt incorrect %u", __func__,
|
---|
676 | *(uma_find_refcnt(zone, mem))) );
|
---|
677 |
|
---|
678 | trash_dtor(mem, size, arg);
|
---|
679 | #elif defined(VBOX)
|
---|
680 | NOREF(pData);
|
---|
681 | NOREF(mem);
|
---|
682 | NOREF(size);
|
---|
683 | NOREF(arg);
|
---|
684 | #endif
|
---|
685 | }
|
---|
686 |
|
---|
687 | /*
|
---|
688 | * The Packet secondary zone's init routine, executed on the
|
---|
689 | * object's transition from mbuf keg slab to zone cache.
|
---|
690 | */
|
---|
691 | static int
|
---|
692 | #ifndef VBOX
|
---|
693 | mb_zinit_pack(void *mem, int size, int how)
|
---|
694 | #else
|
---|
695 | mb_zinit_pack(PNATState pData, void *mem, int size, int how)
|
---|
696 | #endif
|
---|
697 | {
|
---|
698 | struct mbuf *m;
|
---|
699 |
|
---|
700 | m = (struct mbuf *)mem; /* m is virgin. */
|
---|
701 | if (uma_zalloc_arg(zone_clust, m, how) == NULL ||
|
---|
702 | m->m_ext.ext_buf == NULL)
|
---|
703 | return (ENOMEM);
|
---|
704 | m->m_ext.ext_type = EXT_PACKET; /* Override. */
|
---|
705 | #ifdef INVARIANTS
|
---|
706 | trash_init(m->m_ext.ext_buf, MCLBYTES, how);
|
---|
707 | #elif defined(VBOX)
|
---|
708 | NOREF(size);
|
---|
709 | #endif
|
---|
710 | return (0);
|
---|
711 | }
|
---|
712 |
|
---|
713 | /*
|
---|
714 | * The Packet secondary zone's fini routine, executed on the
|
---|
715 | * object's transition from zone cache to keg slab.
|
---|
716 | */
|
---|
717 | static void
|
---|
718 | #ifndef VBOX
|
---|
719 | mb_zfini_pack(void *mem, int size)
|
---|
720 | #else
|
---|
721 | mb_zfini_pack(PNATState pData, void *mem, int size)
|
---|
722 | #endif
|
---|
723 | {
|
---|
724 | struct mbuf *m;
|
---|
725 |
|
---|
726 | m = (struct mbuf *)mem;
|
---|
727 | #ifdef INVARIANTS
|
---|
728 | trash_fini(m->m_ext.ext_buf, MCLBYTES);
|
---|
729 | #endif
|
---|
730 | uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL);
|
---|
731 | #ifdef INVARIANTS
|
---|
732 | trash_dtor(mem, size, NULL);
|
---|
733 | #elif defined(VBOX)
|
---|
734 | NOREF(size);
|
---|
735 | #endif
|
---|
736 | }
|
---|
737 |
|
---|
738 | /*
|
---|
739 | * The "packet" keg constructor.
|
---|
740 | */
|
---|
741 | static int
|
---|
742 | #ifndef VBOX
|
---|
743 | mb_ctor_pack(void *mem, int size, void *arg, int how)
|
---|
744 | #else
|
---|
745 | mb_ctor_pack(PNATState pData, void *mem, int size, void *arg, int how)
|
---|
746 | #endif
|
---|
747 | {
|
---|
748 | struct mbuf *m;
|
---|
749 | struct mb_args *args;
|
---|
750 | #ifdef MAC
|
---|
751 | int error;
|
---|
752 | #endif
|
---|
753 | int flags;
|
---|
754 | short type;
|
---|
755 | #ifdef VBOX
|
---|
756 | NOREF(pData);
|
---|
757 | NOREF(size);
|
---|
758 | #endif
|
---|
759 |
|
---|
760 | m = (struct mbuf *)mem;
|
---|
761 | args = (struct mb_args *)arg;
|
---|
762 | flags = args->flags;
|
---|
763 | type = args->type;
|
---|
764 |
|
---|
765 | #ifdef INVARIANTS
|
---|
766 | trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how);
|
---|
767 | #elif defined(VBOX)
|
---|
768 | NOREF(how);
|
---|
769 | #endif
|
---|
770 | m->m_next = NULL;
|
---|
771 | m->m_nextpkt = NULL;
|
---|
772 | m->m_data = m->m_ext.ext_buf;
|
---|
773 | m->m_len = 0;
|
---|
774 | m->m_flags = (flags | M_EXT);
|
---|
775 | m->m_type = type;
|
---|
776 |
|
---|
777 | if (flags & M_PKTHDR) {
|
---|
778 | m->m_pkthdr.rcvif = NULL;
|
---|
779 | m->m_pkthdr.len = 0;
|
---|
780 | m->m_pkthdr.header = NULL;
|
---|
781 | m->m_pkthdr.csum_flags = 0;
|
---|
782 | m->m_pkthdr.csum_data = 0;
|
---|
783 | m->m_pkthdr.tso_segsz = 0;
|
---|
784 | m->m_pkthdr.ether_vtag = 0;
|
---|
785 | SLIST_INIT(&m->m_pkthdr.tags);
|
---|
786 | #ifdef MAC
|
---|
787 | /* If the label init fails, fail the alloc */
|
---|
788 | error = mac_init_mbuf(m, how);
|
---|
789 | if (error)
|
---|
790 | return (error);
|
---|
791 | #endif
|
---|
792 | }
|
---|
793 | /* m_ext is already initialized. */
|
---|
794 |
|
---|
795 | return (0);
|
---|
796 | }
|
---|
797 |
|
---|
798 | #if 0 /* unused */
|
---|
799 | /*
|
---|
800 | * This is the protocol drain routine.
|
---|
801 | *
|
---|
802 | * No locks should be held when this is called. The drain routines have to
|
---|
803 | * presently acquire some locks which raises the possibility of lock order
|
---|
804 | * reversal.
|
---|
805 | */
|
---|
806 | static void
|
---|
807 | mb_reclaim(void *junk)
|
---|
808 | {
|
---|
809 | #ifndef VBOX
|
---|
810 | struct domain *dp;
|
---|
811 | struct protosw *pr;
|
---|
812 |
|
---|
813 | WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL,
|
---|
814 | "mb_reclaim()");
|
---|
815 |
|
---|
816 | for (dp = domains; dp != NULL; dp = dp->dom_next)
|
---|
817 | for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++)
|
---|
818 | if (pr->pr_drain != NULL)
|
---|
819 | (*pr->pr_drain)();
|
---|
820 | #else
|
---|
821 | NOREF(junk);
|
---|
822 | #endif
|
---|
823 | }
|
---|
824 | #endif /* unused */
|
---|