Project

General

Profile

Statistics
| Revision:

root / lab4 / .minix-src / include / netinet / tcp_vtw.h @ 13

History | View | Annotate | Download (12.4 KB)

1 13 up20180614
/*        $NetBSD: tcp_vtw.h,v 1.6 2012/11/23 14:48:31 joerg Exp $        */
2
/*
3
 * Copyright (c) 2011 The NetBSD Foundation, Inc.
4
 * All rights reserved.
5
 *
6
 * This code is derived from software contributed to The NetBSD Foundation
7
 * by Coyote Point Systems, Inc.
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28
 * POSSIBILITY OF SUCH DAMAGE.
29
 */
30
31
/*
32
 * Vestigial time-wait.
33
 *
34
 * This implementation uses cache-efficient techniques, which will
35
 * appear somewhat peculiar.  The main philosophy is to optimise the
36
 * amount of information available within a cache line.  Cache miss is
37
 * expensive.  So we employ ad-hoc techniques to pull a series of
38
 * linked-list follows into a cache line.  One cache line, multiple
39
 * linked-list equivalents.
40
 *
41
 * One such ad-hoc technique is fat pointers.  Additional degrees of
42
 * ad-hoqueness result from having to hand tune it for pointer size
43
 * and for cache line size.
44
 *
45
 * The 'fat pointer' approach aggregates, for x86_32, 15 linked-list
46
 * data structures into one cache line.  The additional 32 bits in the
47
 * cache line are used for linking fat pointers, and for
48
 * allocation/bookkeeping.
49
 *
50
 * The 15 32-bit tags encode the pointers to the linked list elements,
51
 * and also encode the results of a search comparison.
52
 *
53
 * First, some more assumptions/restrictions.
54
 *
55
 * All the fat pointers are from a contiguous allocation arena.  Thus,
56
 * we can refer to them by offset from a base, not as full pointers.
57
 *
58
 * All the linked list data elements are also from a contiguous
59
 * allocation arena, again so that we can refer to them as offset from
60
 * a base.
61
 *
62
 * In order to add a data element to a fat pointer, a key value is
63
 * computed, based on unique data within the data element.  It is the
64
 * linear searching of the linked lists of these elements based on
65
 * these unique data that are being optimised here.
66
 *
67
 * Lets call the function that computes the key k(e), where e is the
68
 * data element.  In this example, k(e) returns 32-bits.
69
 *
70
 * Consider a set E (say of order 15) of data elements.  Let K be
71
 * the set of the k(e) for e in E.
72
 *
73
 * Let O be the set of the offsets from the base of the data elements in E.
74
 *
75
 * For each x in K, for each matching o in O, let t be x ^ o.  These
76
 * are the tags. (More or less).
77
 *
78
 * In order to search all the data elements in E, we compute the
79
 * search key, and one at a time, XOR the key into the tags.  If any
80
 * result is a valid data element index, we have a possible match.  If
81
 * not, there is no match.
82
 *
83
 * The no-match cases mean we do not have to de-reference the pointer
84
 * to the data element in question.  We save cache miss penalty and
85
 * cache load decreases.  Only in the case of a valid looking data
86
 * element index, do we have to look closer.
87
 *
88
 * Thus, in the absence of false positives, 15 data elements can be
89
 * searched with one cache line fill, as opposed to 15 cache line
90
 * fills for the usual implementation.
91
 *
92
 * The vestigial time waits (vtw_t), the data elements in the above, are
93
 * searched by faddr, fport, laddr, lport.  The key is a function of
94
 * these values.
95
 *
96
 * We hash these keys into the traditional hash chains to reduce the
97
 * search time, and use fat pointers to reduce the cache impacts of
98
 * searching.
99
 *
100
 * The vtw_t are, per requirement, in a contiguous chunk.  Allocation
101
 * is done with a clock hand, and all vtw_t within one allocation
102
 * domain have the same lifetime, so they will always be sorted by
103
 * age.
104
 *
105
 * A vtw_t will be allocated, timestamped, and have a fixed future
106
 * expiration.  It will be added to a hash bucket implemented with fat
107
 * pointers, which means that a cache line will be allocated in the
108
 * hash bucket, placed at the head (more recent in time) and the vtw_t
109
 * will be added to this.  As more entries are added, the fat pointer
110
 * cache line will fill, requiring additional cache lines for fat
111
 * pointers to be allocated. These will be added at the head, and the
112
 * aged entries will hang down, tapeworm like.  As the vtw_t entries
113
 * expire, the corresponding slot in the fat pointer will be
114
 * reclaimed, and eventually the cache line will completely empty and
115
 * be re-cycled, if not at the head of the chain.
116
 *
117
 * At times, a time-wait timer is restarted.  This corresponds to
118
 * deleting the current entry and re-adding it.
119
 *
120
 * Most of the time, they are just placed here to die.
121
 */
122
#ifndef _NETINET_TCP_VTW_H
123
#define _NETINET_TCP_VTW_H
124
125
#include <sys/types.h>
126
#include <sys/socket.h>
127
#include <sys/sysctl.h>
128
#include <net/if.h>
129
#include <net/route.h>
130
#include <netinet/in.h>
131
#include <netinet/in_systm.h>
132
#include <netinet/ip.h>
133
#include <netinet/in_pcb.h>
134
#include <netinet/in_var.h>
135
#include <netinet/ip_var.h>
136
#include <netinet/in.h>
137
#include <netinet/tcp.h>
138
#include <netinet/tcp_timer.h>
139
#include <netinet/tcp_var.h>
140
#include <netinet6/in6.h>
141
#include <netinet/ip6.h>
142
#include <netinet6/ip6_var.h>
143
#include <netinet6/in6_pcb.h>
144
#include <netinet6/ip6_var.h>
145
#include <netinet6/in6_var.h>
146
#include <netinet/icmp6.h>
147
#include <netinet6/nd6.h>
148
149
#define        VTW_NCLASS        (1+3)                /* # different classes */
150
151
/*
152
 * fat pointers, MI.
153
 */
154
struct fatp_mi;
155
156
typedef uint32_t fatp_word_t;
157
158
typedef struct fatp_mi        fatp_t;
159
160
/* Supported cacheline sizes: 32 64 128 bytes.  See fatp_key(),
161
 * fatp_slot_from_key(), fatp_xtra[].
162
 */
163
#define        FATP_NTAGS        (CACHE_LINE_SIZE / sizeof(fatp_word_t) - 1)
164
#define        FATP_NXT_WIDTH        (sizeof(fatp_word_t) * NBBY - FATP_NTAGS)
165
166
#define        FATP_MAX        (1 << FATP_NXT_WIDTH)
167
168
/* Worked example: ULP32 with 64-byte cacheline (32-bit x86):
169
 * 15 tags per cacheline.  At most 2^17 fat pointers per fatp_ctl_t.
170
 * The comments on the fatp_mi members, below, correspond to the worked
171
 * example.
172
 */
173
struct fatp_mi {
174
        fatp_word_t        inuse        : FATP_NTAGS;        /* (1+15)*4 == CL_SIZE */
175
        fatp_word_t        nxt        : FATP_NXT_WIDTH;/* at most 2^17 fat pointers */
176
        fatp_word_t        tag[FATP_NTAGS];        /* 15 tags per CL */
177
};
178
179
static inline int
180
fatp_ntags(void)
181
{
182
        return FATP_NTAGS;
183
}
184
185
static inline int
186
fatp_full(fatp_t *fp)
187
{
188
        fatp_t full;
189
190
        full.inuse = (1U << FATP_NTAGS) - 1U;
191
192
        return (fp->inuse == full.inuse);
193
}
194
195
struct vtw_common;
196
struct vtw_v4;
197
struct vtw_v6;
198
struct vtw_ctl;
199
200
/*!\brief common to all vtw
201
 */
202
typedef struct vtw_common {
203
        struct timeval        expire;                /* date of birth+msl */
204
        uint32_t        key;                /* hash key: full hash */
205
        uint32_t        port_key;        /* hash key: local port hash */
206
        uint32_t        rcv_nxt;
207
        uint32_t        rcv_wnd;
208
        uint32_t        snd_nxt;
209
        uint32_t        snd_scale        : 8;        /* window scaling for send win */
210
        uint32_t        msl_class        : 2;        /* TCP MSL class {0,1,2,3} */
211
        uint32_t        reuse_port        : 1;
212
        uint32_t        reuse_addr        : 1;
213
        uint32_t        v6only                : 1;
214
        uint32_t        hashed                : 1;        /* reachable via FATP */
215
        uint32_t        uid;
216
} vtw_t;
217
218
/*!\brief vestigial timewait for IPv4
219
 */
220
typedef struct vtw_v4 {
221
        vtw_t                common;                /*  must be first */
222
        uint16_t        lport;
223
        uint16_t        fport;
224
        uint32_t        laddr;
225
        uint32_t        faddr;
226
} vtw_v4_t;
227
228
/*!\brief vestigial timewait for IPv6
229
 */
230
typedef struct vtw_v6 {
231
        vtw_t                common;                /* must be first */
232
        uint16_t        lport;
233
        uint16_t        fport;
234
        struct in6_addr        laddr;
235
        struct in6_addr        faddr;
236
} vtw_v6_t;
237
238
struct fatp_ctl;
239
typedef struct vtw_ctl                vtw_ctl_t;
240
typedef struct fatp_ctl                fatp_ctl_t;
241
242
/*
243
 * The vestigial time waits are kept in a contiguous chunk.
244
 * Allocation and free pointers run as clock hands thru this array.
245
 */
246
struct vtw_ctl {
247
        fatp_ctl_t        *fat;                /* collection of fatp to use        */
248
        vtw_ctl_t        *ctl;                /* <! controller's controller        */
249
        union {
250
                vtw_t                *v;        /* common                        */
251
                struct vtw_v4        *v4;        /* IPv4 resources                */
252
                struct vtw_v6        *v6;        /* IPv6 resources                */
253
        }                base,                /* base of vtw_t array                */
254
                /**/        lim,                /* extent of vtw_t array        */
255
                /**/        alloc,                /* allocation pointer                */
256
                /**/        oldest;                /* ^ to oldest                        */
257
        uint32_t        nfree;                /* # free                        */
258
        uint32_t        nalloc;                /* # allocated                        */
259
        uint32_t        idx_mask;        /* mask capturing all index bits*/
260
        uint32_t        is_v4        : 1;
261
        uint32_t        is_v6        : 1;
262
        uint32_t        idx_bits: 6;
263
        uint32_t        clidx        : 3;        /* <! class index */
264
};
265
266
/*!\brief Collections of fat pointers.
267
 */
268
struct fatp_ctl {
269
        vtw_ctl_t        *vtw;                /* associated VTWs                */
270
        fatp_t                *base;                /* base of fatp_t array                */
271
        fatp_t                *lim;                /* extent of fatp_t array        */
272
        fatp_t                *free;                /* free list                        */
273
        uint32_t        mask;                /* hash mask                        */
274
        uint32_t        nfree;                /* # free                        */
275
        uint32_t        nalloc;                /* # allocated                        */
276
        fatp_t                **hash;                /* hash anchors                        */
277
        fatp_t                **port;                /* port hash anchors                */
278
};
279
280
/*!\brief stats
281
 */
282
struct vtw_stats {
283
        uint64_t        ins;                /* <! inserts */
284
        uint64_t        del;                /* <! deleted */
285
        uint64_t        kill;                /* <! assassination */
286
        uint64_t        look[2];        /* <! lookup: full hash, port hash */
287
        uint64_t        hit[2];                /* <! lookups that hit */
288
        uint64_t        miss[2];        /* <! lookups that miss */
289
        uint64_t        probe[2];        /* <! hits+miss */
290
        uint64_t        losing[2];        /* <! misses requiring dereference */
291
        uint64_t        max_chain[2];        /* <! max fatp chain traversed */
292
        uint64_t        max_probe[2];        /* <! max probes in any one chain */
293
        uint64_t        max_loss[2];        /* <! max losing probes in any one
294
                                         * chain
295
                                         */
296
};
297
298
typedef struct vtw_stats        vtw_stats_t;
299
300
/*!\brief        follow fatp next 'pointer'
301
 */
302
static inline fatp_t *
303
fatp_next(fatp_ctl_t *fat, fatp_t *fp)
304
{
305
        return fp->nxt ? fat->base + fp->nxt-1 : 0;
306
}
307
308
/*!\brief determine a collection-relative fat pointer index.
309
 */
310
static inline uint32_t
311
fatp_index(fatp_ctl_t *fat, fatp_t *fp)
312
{
313
        return fp ? 1 + (fp - fat->base) : 0;
314
}
315
316
317
static inline uint32_t
318
v4_tag(uint32_t faddr, uint32_t fport, uint32_t laddr, uint32_t lport)
319
{
320
        return (ntohl(faddr)   + ntohs(fport)
321
                + ntohl(laddr) + ntohs(lport));
322
}
323
324
static inline uint32_t
325
v6_tag(const struct in6_addr *faddr, uint16_t fport,
326
       const struct in6_addr *laddr, uint16_t lport)
327
{
328
#ifdef IN6_HASH
329
        return IN6_HASH(faddr, fport, laddr, lport);
330
#else
331
        return 0;
332
#endif
333
}
334
335
static inline uint32_t
336
v4_port_tag(uint16_t lport)
337
{
338
        uint32_t tag = lport ^ (lport << 11);
339
340
        tag ^= tag << 3;
341
        tag += tag >> 5;
342
        tag ^= tag << 4;
343
        tag += tag >> 17;
344
        tag ^= tag << 25;
345
        tag += tag >> 6;
346
347
        return tag;
348
}
349
350
static inline uint32_t
351
v6_port_tag(uint16_t lport)
352
{
353
        return v4_port_tag(lport);
354
}
355
356
struct tcpcb;
357
struct tcphdr;
358
359
int  vtw_add(int, struct tcpcb *);
360
void vtw_del(vtw_ctl_t *, vtw_t *);
361
int vtw_lookup_v4(const struct ip *ip, const struct tcphdr *th,
362
                  uint32_t faddr, uint16_t fport,
363
                  uint32_t laddr, uint16_t lport);
364
struct ip6_hdr;
365
struct in6_addr;
366
367
int vtw_lookup_v6(const struct ip6_hdr *ip, const struct tcphdr *th,
368
                  const struct in6_addr *faddr, uint16_t fport,
369
                  const struct in6_addr *laddr, uint16_t lport);
370
371
typedef struct vestigial_inpcb {
372
        union {
373
                struct in_addr        v4;
374
                struct in6_addr        v6;
375
        } faddr, laddr;
376
        uint16_t                fport, lport;
377
        uint32_t                valid                : 1;
378
        uint32_t                v4                : 1;
379
        uint32_t                reuse_addr        : 1;
380
        uint32_t                reuse_port        : 1;
381
        uint32_t                v6only                : 1;
382
        uint32_t                more_tbd        : 1;
383
        uint32_t                uid;
384
        uint32_t                rcv_nxt;
385
        uint32_t                rcv_wnd;
386
        uint32_t                snd_nxt;
387
        struct vtw_common        *vtw;
388
        struct vtw_ctl                *ctl;
389
} vestigial_inpcb_t;
390
391
#ifdef _KERNEL
392
void vtw_restart(vestigial_inpcb_t*);
393
int vtw_earlyinit(void);
394
int sysctl_tcp_vtw_enable(SYSCTLFN_PROTO);
395
#endif /* _KERNEL */
396
397
#ifdef VTW_DEBUG
398
typedef struct sin_either {
399
        uint8_t                sin_len;
400
        uint8_t                sin_family;
401
        uint16_t        sin_port;
402
        union {
403
                struct in_addr        v4;
404
                struct in6_addr        v6;
405
        }                sin_addr;
406
} sin_either_t;
407
408
int vtw_debug_add(int af, sin_either_t *, sin_either_t *, int, int);
409
410
typedef struct vtw_sysargs {
411
        uint32_t        op;
412
        sin_either_t        fa;
413
        sin_either_t        la;
414
} vtw_sysargs_t;
415
416
#endif /* VTW_DEBUG */
417
418
#endif /* _NETINET_TCP_VTW_H */