1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#include <sys/types.h>
28#include <sys/debug.h>
29#include <sys/param.h>
30#include <sys/stat.h>
31#include <sys/systm.h>
32#include <sys/socket.h>
33#include <sys/stream.h>
34#include <sys/stropts.h>
35#include <sys/errno.h>
36#include <sys/time.h>
37#include <sys/cmn_err.h>
38#include <sys/sdt.h>
39#include <sys/conf.h>
40#include <sys/dlpi.h>
41#include <sys/ddi.h>
42#include <sys/kstat.h>
43#include <sys/strsun.h>
44#include <sys/bitmap.h>
45#include <sys/sysmacros.h>
46#include <sys/note.h>
47#include <sys/policy.h>
48#include <net/ppp_defs.h>
49#include <net/pppio.h>
50#include <net/sppptun.h>
51#include <net/pppoe.h>
52#include <netinet/in.h>
53
54#include "s_common.h"
55#include "sppptun_mod.h"
56#include "sppptun_impl.h"
57
58#define	NTUN_INITIAL 16			/* Initial number of sppptun slots */
59#define	NTUN_PERCENT 5			/* Percent of memory to use */
60
61/*
62 * This is used to tag official Solaris sources.  Please do not define
63 * "INTERNAL_BUILD" when building this software outside of Sun
64 * Microsystems.
65 */
66#ifdef INTERNAL_BUILD
67/* MODINFO is limited to 32 characters. */
68const char sppptun_driver_description[] = "PPP 4.0 tunnel driver";
69const char sppptun_module_description[] = "PPP 4.0 tunnel module";
70#else
71const char sppptun_driver_description[] = "ANU PPP tundrv";
72const char sppptun_module_description[] = "ANU PPP tunmod";
73
74/* LINTED */
75static const char buildtime[] = "Built " __DATE__ " at " __TIME__
76#ifdef DEBUG
77" DEBUG"
78#endif
79"\n";
80#endif
81
82/*
83 * Tunable values; these are similar to the values used in ptms_conf.c.
84 * Override these settings via /etc/system.
85 */
86uint_t	sppptun_cnt = 0;		/* Minimum number of tunnels */
87size_t	sppptun_max_pty = 0;		/* Maximum number of tunnels */
88uint_t	sppptun_init_cnt = NTUN_INITIAL; /* Initial number of tunnel slots */
89uint_t	sppptun_pctofmem = NTUN_PERCENT; /* Percent of memory to use */
90
91typedef struct ether_dest_s {
92	ether_addr_t addr;
93	ushort_t type;
94} ether_dest_t;
95
96/* Allows unaligned access. */
97#define	GETLONG(x)	(((x)[0]<<24)|((x)[1]<<16)|((x)[2]<<8)|(x)[3])
98
99static const char *tll_kstats_list[] = { TLL_KSTATS_NAMES };
100static const char *tcl_kstats_list[] = { TCL_KSTATS_NAMES };
101
102#define	KREF(p, m, vn)	p->m.vn.value.ui64
103#define	KINCR(p, m, vn)	++KREF(p, m, vn)
104#define	KDECR(p, m, vn)	--KREF(p, m, vn)
105
106#define	KLINCR(vn)	KINCR(tll, tll_kstats, vn)
107#define	KLDECR(vn)	KDECR(tll, tll_kstats, vn)
108
109#define	KCINCR(vn)	KINCR(tcl, tcl_kstats, vn)
110#define	KCDECR(vn)	KDECR(tcl, tcl_kstats, vn)
111
112static int	sppptun_open(queue_t *, dev_t *, int, int, cred_t *);
113static int	sppptun_close(queue_t *);
114static void	sppptun_urput(queue_t *, mblk_t *);
115static void	sppptun_uwput(queue_t *, mblk_t *);
116static int	sppptun_ursrv(queue_t *);
117static int	sppptun_uwsrv(queue_t *);
118static void	sppptun_lrput(queue_t *, mblk_t *);
119static void	sppptun_lwput(queue_t *, mblk_t *);
120
121/*
122 * This is the hash table of clients.  Clients are the programs that
123 * open /dev/sppptun as a device.  There may be a large number of
124 * these; one per tunneled PPP session.
125 *
126 * Note: slots are offset from minor node value by 1 because
127 * vmem_alloc returns 0 for failure.
128 *
129 * The tcl_slots array entries are modified only when exclusive on
130 * both inner and outer perimeters.  This ensures that threads on
131 * shared perimeters always view this as unchanging memory with no
132 * need to lock around accesses.  (Specifically, the tcl_slots array
133 * is modified by entry to sppptun_open, sppptun_close, and _fini.)
134 */
135static tuncl_t **tcl_slots = NULL;	/* Slots for tuncl_t */
136static size_t tcl_nslots = 0;		/* Size of slot array */
137static size_t tcl_minormax = 0;		/* Maximum number of tunnels */
138static size_t tcl_inuse = 0;		/* # of tunnels currently allocated */
139static krwlock_t tcl_rwlock;
140static struct kmem_cache *tcl_cache = NULL;	/* tunnel cache */
141static vmem_t *tcl_minor_arena = NULL; /* Arena for device minors */
142
143/*
144 * This is the simple list of lower layers.  For PPPoE, there is one
145 * of these per Ethernet interface.  Lower layers are established by
146 * "plumbing" -- using I_PLINK to connect the tunnel multiplexor to
147 * the physical interface.
148 */
149static struct qelem tunll_list;
150static int tunll_index;
151
152/* Test value; if all zeroes, then address hasn't been set yet. */
153static const ether_addr_t zero_mac_addr = { 0, 0, 0, 0, 0, 0 };
154
155#define	MIN_SET_FASTPATH_UNITDATAREQ_SIZE	\
156	(sizeof (dl_unitdata_req_t) + 4)
157
158#define	TUN_MI_ID	2104	/* officially allocated module ID */
159#define	TUN_MI_MINPSZ	(0)
160#define	TUN_MI_MAXPSZ	(PPP_MAXMTU)
161#define	TUN_MI_HIWAT	(PPP_MTU * 8)
162#define	TUN_MI_LOWAT	(128)
163
164static struct module_info sppptun_modinfo = {
165	TUN_MI_ID,		/* mi_idnum */
166	PPP_TUN_NAME,		/* mi_idname */
167	TUN_MI_MINPSZ,		/* mi_minpsz */
168	TUN_MI_MAXPSZ,		/* mi_maxpsz */
169	TUN_MI_HIWAT,		/* mi_hiwat */
170	TUN_MI_LOWAT		/* mi_lowat */
171};
172
173static struct qinit sppptun_urinit = {
174	(int (*)())sppptun_urput, /* qi_putp */
175	sppptun_ursrv,		/* qi_srvp */
176	sppptun_open,		/* qi_qopen */
177	sppptun_close,		/* qi_qclose */
178	NULL,			/* qi_qadmin */
179	&sppptun_modinfo,	/* qi_minfo */
180	NULL			/* qi_mstat */
181};
182
183static struct qinit sppptun_uwinit = {
184	(int (*)())sppptun_uwput, /* qi_putp */
185	sppptun_uwsrv,		/* qi_srvp */
186	NULL,			/* qi_qopen */
187	NULL,			/* qi_qclose */
188	NULL,			/* qi_qadmin */
189	&sppptun_modinfo,	/* qi_minfo */
190	NULL			/* qi_mstat */
191};
192
193static struct qinit sppptun_lrinit = {
194	(int (*)())sppptun_lrput, /* qi_putp */
195	NULL,			/* qi_srvp */
196	NULL,			/* qi_qopen */
197	NULL,			/* qi_qclose */
198	NULL,			/* qi_qadmin */
199	&sppptun_modinfo,	/* qi_minfo */
200	NULL			/* qi_mstat */
201};
202
203static struct qinit sppptun_lwinit = {
204	(int (*)())sppptun_lwput, /* qi_putp */
205	NULL,			/* qi_srvp */
206	NULL,			/* qi_qopen */
207	NULL,			/* qi_qclose */
208	NULL,			/* qi_qadmin */
209	&sppptun_modinfo,	/* qi_minfo */
210	NULL			/* qi_mstat */
211};
212
213/*
214 * This is referenced in sppptun_mod.c.
215 */
216struct streamtab sppptun_tab = {
217	&sppptun_urinit,	/* st_rdinit */
218	&sppptun_uwinit,	/* st_wrinit */
219	&sppptun_lrinit,	/* st_muxrinit */
220	&sppptun_lwinit		/* st_muxwrinit */
221};
222
223/*
224 * Allocate another slot table twice as large as the original one
225 * (limited to global maximum).  Migrate all tunnels to the new slot
226 * table and free the original one.  Assumes we're exclusive on both
227 * inner and outer perimeters, and thus there are no other users of
228 * the tcl_slots array.
229 */
230static minor_t
231tcl_grow(void)
232{
233	minor_t old_size = tcl_nslots;
234	minor_t new_size = 2 * old_size;
235	tuncl_t **tcl_old = tcl_slots;
236	tuncl_t **tcl_new;
237	void  *vaddr;			/* vmem_add return value */
238
239	ASSERT(RW_LOCK_HELD(&tcl_rwlock));
240
241	/* Allocate new ptms array */
242	tcl_new = kmem_zalloc(new_size * sizeof (tuncl_t *), KM_NOSLEEP);
243	if (tcl_new == NULL)
244		return ((minor_t)0);
245
246	/* Increase clone index space */
247	vaddr = vmem_add(tcl_minor_arena, (void*)((uintptr_t)old_size + 1),
248	    new_size - old_size, VM_NOSLEEP);
249
250	if (vaddr == NULL) {
251		kmem_free(tcl_new, new_size * sizeof (tuncl_t *));
252		return ((minor_t)0);
253	}
254
255	/* Migrate tuncl_t entries to a new location */
256	tcl_nslots = new_size;
257	bcopy(tcl_old, tcl_new, old_size * sizeof (tuncl_t *));
258	tcl_slots = tcl_new;
259	kmem_free(tcl_old, old_size * sizeof (tuncl_t *));
260
261	/* Allocate minor number and return it */
262	return ((minor_t)(uintptr_t)vmem_alloc(tcl_minor_arena, 1, VM_NOSLEEP));
263}
264
265/*
266 * Allocate new minor number and tunnel client entry.  Returns the new
267 * entry or NULL if no memory or maximum number of entries reached.
268 * Assumes we're exclusive on both inner and outer perimeters, and
269 * thus there are no other users of the tcl_slots array.
270 */
271static tuncl_t *
272tuncl_alloc(int wantminor)
273{
274	minor_t dminor;
275	tuncl_t *tcl = NULL;
276
277	rw_enter(&tcl_rwlock, RW_WRITER);
278
279	ASSERT(tcl_slots != NULL);
280
281	/*
282	 * Always try to allocate new pty when sppptun_cnt minimum
283	 * limit is not achieved. If it is achieved, the maximum is
284	 * determined by either user-specified value (if it is
285	 * non-zero) or our memory estimations - whatever is less.
286	 */
287	if (tcl_inuse >= sppptun_cnt) {
288		/*
289		 * When system achieved required minimum of tunnels,
290		 * check for the denial of service limits.
291		 *
292		 * Get user-imposed maximum, if configured, or
293		 * calculated memory constraint.
294		 */
295		size_t user_max = (sppptun_max_pty == 0 ? tcl_minormax :
296		    min(sppptun_max_pty, tcl_minormax));
297
298		/* Do not try to allocate more than allowed */
299		if (tcl_inuse >= user_max) {
300			rw_exit(&tcl_rwlock);
301			return (NULL);
302		}
303	}
304	tcl_inuse++;
305
306	/*
307	 * Allocate new minor number. If this fails, all slots are
308	 * busy and we need to grow the hash.
309	 */
310	if (wantminor <= 0) {
311		dminor = (minor_t)(uintptr_t)vmem_alloc(tcl_minor_arena, 1,
312		    VM_NOSLEEP);
313		if (dminor == 0) {
314			/* Grow the cache and retry allocation */
315			dminor = tcl_grow();
316		}
317	} else {
318		dminor = (minor_t)(uintptr_t)vmem_xalloc(tcl_minor_arena, 1,
319		    0, 0, 0, (void *)(uintptr_t)wantminor,
320		    (void *)((uintptr_t)wantminor+1), VM_NOSLEEP);
321		if (dminor != 0 && dminor != wantminor) {
322			vmem_free(tcl_minor_arena, (void *)(uintptr_t)dminor,
323			    1);
324			dminor = 0;
325		}
326	}
327
328	if (dminor == 0) {
329		/* Not enough memory now */
330		tcl_inuse--;
331		rw_exit(&tcl_rwlock);
332		return (NULL);
333	}
334
335	tcl = kmem_cache_alloc(tcl_cache, KM_NOSLEEP);
336	if (tcl == NULL) {
337		/* Not enough memory - this entry can't be used now. */
338		vmem_free(tcl_minor_arena, (void *)(uintptr_t)dminor, 1);
339		tcl_inuse--;
340	} else {
341		bzero(tcl, sizeof (*tcl));
342		tcl->tcl_lsessid = dminor;
343		ASSERT(tcl_slots[dminor - 1] == NULL);
344		tcl_slots[dminor - 1] = tcl;
345	}
346
347	rw_exit(&tcl_rwlock);
348	return (tcl);
349}
350
351/*
352 * This routine frees an upper level (client) stream by removing it
353 * from the minor number pool and freeing the state structure storage.
354 * Assumes we're exclusive on both inner and outer perimeters, and
355 * thus there are no other concurrent users of the tcl_slots array or
356 * of any entry in that array.
357 */
358static void
359tuncl_free(tuncl_t *tcl)
360{
361	rw_enter(&tcl_rwlock, RW_WRITER);
362	ASSERT(tcl->tcl_lsessid <= tcl_nslots);
363	ASSERT(tcl_slots[tcl->tcl_lsessid - 1] == tcl);
364	ASSERT(tcl_inuse > 0);
365	tcl_inuse--;
366	tcl_slots[tcl->tcl_lsessid - 1] = NULL;
367
368	if (tcl->tcl_ksp != NULL) {
369		kstat_delete(tcl->tcl_ksp);
370		tcl->tcl_ksp = NULL;
371	}
372
373	/* Return minor number to the pool of minors */
374	vmem_free(tcl_minor_arena, (void *)(uintptr_t)tcl->tcl_lsessid, 1);
375
376	/* Return tuncl_t to the cache */
377	kmem_cache_free(tcl_cache, tcl);
378	rw_exit(&tcl_rwlock);
379}
380
381/*
382 * Get tuncl_t structure by minor number.  Returns NULL when minor is
383 * out of range.  Note that lookup of tcl pointers (and use of those
384 * pointers) is safe because modification is done only when exclusive
385 * on both inner and outer perimeters.
386 */
387static tuncl_t *
388tcl_by_minor(minor_t dminor)
389{
390	tuncl_t *tcl = NULL;
391
392	if ((dminor >= 1) && (dminor <= tcl_nslots) && tcl_slots != NULL) {
393		tcl = tcl_slots[dminor - 1];
394	}
395
396	return (tcl);
397}
398
399/*
400 * Set up kstats for upper or lower stream.
401 */
402static kstat_t *
403kstat_setup(kstat_named_t *knt, const char **names, int nstat,
404    const char *modname, int unitnum)
405{
406	kstat_t *ksp;
407	char unitname[KSTAT_STRLEN];
408	int i;
409
410	for (i = 0; i < nstat; i++) {
411		kstat_set_string(knt[i].name, names[i]);
412		knt[i].data_type = KSTAT_DATA_UINT64;
413	}
414	(void) sprintf(unitname, "%s" "%d", modname, unitnum);
415	ksp = kstat_create(modname, unitnum, unitname, "net",
416	    KSTAT_TYPE_NAMED, nstat, KSTAT_FLAG_VIRTUAL);
417	if (ksp != NULL) {
418		ksp->ks_data = (void *)knt;
419		kstat_install(ksp);
420	}
421	return (ksp);
422}
423
424/*
425 * sppptun_open()
426 *
427 * MT-Perimeters:
428 *    exclusive inner, exclusive outer.
429 *
430 * Description:
431 *    Common open procedure for module and driver.
432 */
433static int
434sppptun_open(queue_t *q, dev_t *devp, int oflag, int sflag, cred_t *credp)
435{
436	_NOTE(ARGUNUSED(oflag))
437
438	/* Allow a re-open */
439	if (q->q_ptr != NULL)
440		return (0);
441
442	/* In the off chance that we're on our way out, just return error */
443	if (tcl_slots == NULL)
444		return (EINVAL);
445
446	if (sflag & MODOPEN) {
447		tunll_t *tll;
448		char *cp;
449
450		/* ordinary users have no need to push this module */
451		if (secpolicy_ppp_config(credp) != 0)
452			return (EPERM);
453
454		tll = kmem_zalloc(sizeof (tunll_t), KM_SLEEP);
455
456		tll->tll_index = tunll_index++;
457
458		tll->tll_wq = WR(q);
459		tll->tll_zoneid = crgetzoneid(credp);
460
461		/* Insert at end of list */
462		insque(&tll->tll_next, tunll_list.q_back);
463		q->q_ptr = WR(q)->q_ptr = tll;
464
465		tll->tll_style = PTS_PPPOE;
466		tll->tll_alen = sizeof (tll->tll_lcladdr.pta_pppoe);
467
468		tll->tll_ksp = kstat_setup((kstat_named_t *)&tll->tll_kstats,
469		    tll_kstats_list, Dim(tll_kstats_list), "tll",
470		    tll->tll_index);
471
472		/*
473		 * Find the name of the driver somewhere beneath us.
474		 * Note that we have no driver under us until after
475		 * qprocson().
476		 */
477		qprocson(q);
478		for (q = WR(q); q->q_next != NULL; q = q->q_next)
479			;
480		cp = NULL;
481		if (q->q_qinfo != NULL && q->q_qinfo->qi_minfo != NULL)
482			cp = q->q_qinfo->qi_minfo->mi_idname;
483		if (cp != NULL && *cp == '\0')
484			cp = NULL;
485
486		/* Set initial name; user should overwrite. */
487		if (cp == NULL)
488			(void) snprintf(tll->tll_name, sizeof (tll->tll_name),
489			    PPP_TUN_NAME "%d", tll->tll_index);
490		else
491			(void) snprintf(tll->tll_name, sizeof (tll->tll_name),
492			    "%s:tun%d", cp, tll->tll_index);
493	} else {
494		tuncl_t	*tcl;
495
496		ASSERT(devp != NULL);
497		if (sflag & CLONEOPEN) {
498			tcl = tuncl_alloc(-1);
499		} else {
500			minor_t mn;
501
502			/*
503			 * Support of non-clone open (ie, mknod with
504			 * defined minor number) is supported for
505			 * testing purposes so that 'arbitrary' minor
506			 * numbers can be used.
507			 */
508			mn = getminor(*devp);
509			if (mn == 0 || (tcl = tcl_by_minor(mn)) != NULL) {
510				return (EPERM);
511			}
512			tcl = tuncl_alloc(mn);
513		}
514		if (tcl == NULL)
515			return (ENOSR);
516		tcl->tcl_rq = q;		/* save read queue pointer */
517		tcl->tcl_flags |= TCLF_ISCLIENT;	/* sanity check */
518		tcl->tcl_zoneid = crgetzoneid(credp);
519
520		q->q_ptr = WR(q)->q_ptr = (caddr_t)tcl;
521		*devp = makedevice(getmajor(*devp), tcl->tcl_lsessid);
522
523		tcl->tcl_ksp = kstat_setup((kstat_named_t *)&tcl->tcl_kstats,
524		    tcl_kstats_list, Dim(tcl_kstats_list), "tcl",
525		    tcl->tcl_lsessid);
526
527		qprocson(q);
528	}
529	return (0);
530}
531
532/*
533 * Create an appropriate control message for this client event.
534 */
535static mblk_t *
536make_control(tuncl_t *tclabout, tunll_t *tllabout, int action, tuncl_t *tclto)
537{
538	struct ppptun_control *ptc;
539	mblk_t *mp = allocb(sizeof (*ptc), BPRI_HI);
540
541	if (mp != NULL) {
542		MTYPE(mp) = M_PROTO;
543		ptc = (struct ppptun_control *)mp->b_wptr;
544		bzero(ptc, sizeof (*ptc));
545		mp->b_wptr += sizeof (*ptc);
546		if (tclabout != NULL) {
547			ptc->ptc_rsessid = tclabout->tcl_rsessid;
548			ptc->ptc_address = tclabout->tcl_address;
549		}
550		ptc->ptc_discrim = tclto->tcl_ctlval;
551		ptc->ptc_action = action;
552		if (tllabout != NULL) {
553			(void) strncpy(ptc->ptc_name, tllabout->tll_name,
554			    sizeof (ptc->ptc_name));
555		}
556	}
557	return (mp);
558}
559
560/*
561 * Send an appropriate control message up this client session.
562 */
563static void
564send_control(tuncl_t *tclabout, tunll_t *tllabout, int action, tuncl_t *tcl)
565{
566	mblk_t *mp;
567
568	if (tcl->tcl_rq != NULL) {
569		mp = make_control(tclabout, tllabout, action, tcl);
570		if (mp != NULL) {
571			KCINCR(cks_octrl_spec);
572			putnext(tcl->tcl_rq, mp);
573		}
574	}
575}
576
577/*
578 * If a lower stream is being unplumbed, then the upper streams
579 * connected to this lower stream must be disconnected.  This routine
580 * accomplishes this by sending M_HANGUP to data streams and M_PROTO
581 * messages to control streams.  This is called by vmem_walk, and
582 * handles a span of minor node numbers.
583 *
584 * No need to update lks_clients here; the lower stream is on its way
585 * out.
586 */
587static void
588tclvm_remove_tll(void *arg, void *firstv, size_t numv)
589{
590	tunll_t *tll = (tunll_t *)arg;
591	int minorn = (int)(uintptr_t)firstv;
592	int minormax = minorn + numv;
593	tuncl_t *tcl;
594	mblk_t *mp;
595
596	while (minorn < minormax) {
597		tcl = tcl_slots[minorn - 1];
598		ASSERT(tcl != NULL);
599		if (tcl->tcl_data_tll == tll && tcl->tcl_rq != NULL) {
600			tcl->tcl_data_tll = NULL;
601			mp = allocb(0, BPRI_HI);
602			if (mp != NULL) {
603				MTYPE(mp) = M_HANGUP;
604				putnext(tcl->tcl_rq, mp);
605				if (tcl->tcl_ctrl_tll == tll)
606					tcl->tcl_ctrl_tll = NULL;
607			}
608		}
609		if (tcl->tcl_ctrl_tll == tll) {
610			send_control(tcl, tll, PTCA_UNPLUMB, tcl);
611			tcl->tcl_ctrl_tll = NULL;
612		}
613		minorn++;
614	}
615}
616
617/*
618 * sppptun_close()
619 *
620 * MT-Perimeters:
621 *    exclusive inner, exclusive outer.
622 *
623 * Description:
624 *    Common close procedure for module and driver.
625 */
626static int
627sppptun_close(queue_t *q)
628{
629	int err;
630	void *qptr;
631	tunll_t *tll;
632	tuncl_t *tcl;
633
634	qptr = q->q_ptr;
635
636	err = 0;
637	tll = qptr;
638	if (!(tll->tll_flags & TLLF_NOTLOWER)) {
639		/* q_next is set on modules */
640		ASSERT(WR(q)->q_next != NULL);
641
642		/* unlink any clients using this lower layer. */
643		vmem_walk(tcl_minor_arena, VMEM_ALLOC, tclvm_remove_tll, tll);
644
645		/* tell daemon that this has been removed. */
646		if ((tcl = tll->tll_defcl) != NULL)
647			send_control(NULL, tll, PTCA_UNPLUMB, tcl);
648
649		tll->tll_flags |= TLLF_CLOSING;
650		while (!(tll->tll_flags & TLLF_CLOSE_DONE)) {
651			qenable(tll->tll_wq);
652			qwait(tll->tll_wq);
653		}
654		tll->tll_error = 0;
655		while (!(tll->tll_flags & TLLF_SHUTDOWN_DONE)) {
656			if (!qwait_sig(tll->tll_wq))
657				break;
658		}
659
660		qprocsoff(q);
661		q->q_ptr = WR(q)->q_ptr = NULL;
662		tll->tll_wq = NULL;
663		remque(&tll->tll_next);
664		err = tll->tll_error;
665		if (tll->tll_ksp != NULL)
666			kstat_delete(tll->tll_ksp);
667		kmem_free(tll, sizeof (*tll));
668	} else {
669		tcl = qptr;
670
671		/* devices are end of line; no q_next. */
672		ASSERT(WR(q)->q_next == NULL);
673
674		qprocsoff(q);
675		DTRACE_PROBE1(sppptun__client__close, tuncl_t *, tcl);
676		tcl->tcl_rq = NULL;
677		q->q_ptr = WR(q)->q_ptr = NULL;
678
679		tll = TO_TLL(tunll_list.q_forw);
680		while (tll != TO_TLL(&tunll_list)) {
681			if (tll->tll_defcl == tcl)
682				tll->tll_defcl = NULL;
683			if (tll->tll_lastcl == tcl)
684				tll->tll_lastcl = NULL;
685			tll = TO_TLL(tll->tll_next);
686		}
687		/*
688		 * If this was a normal session, then tell the daemon.
689		 */
690		if (!(tcl->tcl_flags & TCLF_DAEMON) &&
691		    (tll = tcl->tcl_ctrl_tll) != NULL &&
692		    tll->tll_defcl != NULL) {
693			send_control(tcl, tll, PTCA_DISCONNECT,
694			    tll->tll_defcl);
695		}
696
697		/* Update statistics for references being dropped. */
698		if ((tll = tcl->tcl_data_tll) != NULL) {
699			KLDECR(lks_clients);
700		}
701		if ((tll = tcl->tcl_ctrl_tll) != NULL) {
702			KLDECR(lks_clients);
703		}
704
705		tuncl_free(tcl);
706	}
707
708	return (err);
709}
710
711/*
712 * Allocate and initialize a DLPI or TPI template of the specified
713 * length.
714 */
715static mblk_t *
716pi_alloc(size_t len, int prim)
717{
718	mblk_t	*mp;
719
720	mp = allocb(len, BPRI_MED);
721	if (mp != NULL) {
722		MTYPE(mp) = M_PROTO;
723		mp->b_wptr = mp->b_rptr + len;
724		bzero(mp->b_rptr, len);
725		*(int *)mp->b_rptr = prim;
726	}
727	return (mp);
728}
729
730#define	dlpi_alloc(l, p)	pi_alloc((l), (p))
731
732/*
733 * Prepend some room to an mblk.  Try to reuse the existing buffer, if
734 * at all possible, rather than allocating a new one.  (Fast-path
735 * output should be able to use this.)
736 *
737 * (XXX why isn't this a library function ...?)
738 */
739static mblk_t *
740prependb(mblk_t *mp, size_t len, size_t align)
741{
742	mblk_t *newmp;
743
744
745	if (align == 0)
746		align = 8;
747	if (DB_REF(mp) > 1 || mp->b_datap->db_base+len > mp->b_rptr ||
748	    ((uint_t)((uintptr_t)mp->b_rptr - len) % align) != 0) {
749		if ((newmp = allocb(len, BPRI_LO)) == NULL) {
750			freemsg(mp);
751			return (NULL);
752		}
753		newmp->b_wptr = newmp->b_rptr + len;
754		newmp->b_cont = mp;
755		return (newmp);
756	}
757	mp->b_rptr -= len;
758	return (mp);
759}
760
761/*
762 * sppptun_outpkt()
763 *
764 * MT-Perimeters:
765 *	shared inner, shared outer (if called from sppptun_uwput),
766 *	exclusive inner, shared outer (if called from sppptun_uwsrv).
767 *
768 * Description:
769 *    Called from sppptun_uwput or sppptun_uwsrv when processing a
770 *    M_DATA, M_PROTO, or M_PCPROTO message.  For all cases, it tries
771 *    to prepare the data to be sent to the module below this driver
772 *    if there is a lower stream linked underneath.  If no lower
773 *    stream exists, then the data will be discarded and an ENXIO
774 *    error returned.
775 *
776 * Returns:
777 *	pointer to queue if caller should do putnext, otherwise
778 *	*mpp != NULL if message should be enqueued, otherwise
779 *	*mpp == NULL if message is gone.
780 */
781static queue_t *
782sppptun_outpkt(queue_t *q, mblk_t **mpp)
783{
784	mblk_t *mp;
785	tuncl_t *tcl;
786	tunll_t *tll;
787	mblk_t *encmb;
788	mblk_t *datamb;
789	dl_unitdata_req_t *dur;
790	queue_t *lowerq;
791	poep_t *poep;
792	int len;
793	ether_dest_t *edestp;
794	enum { luNone, luCopy, luSend } loopup;
795	boolean_t isdata;
796	struct ppptun_control *ptc;
797
798	mp = *mpp;
799	tcl = q->q_ptr;
800
801	*mpp = NULL;
802	if (!(tcl->tcl_flags & TCLF_ISCLIENT)) {
803		/* This should never happen on a lower layer stream */
804		freemsg(mp);
805		return (NULL);
806	}
807
808	isdata = (MTYPE(mp) == M_DATA);
809	if (isdata) {
810		tll = tcl->tcl_data_tll;
811		ptc = NULL;
812	} else {
813		/*
814		 * If data are unaligned or otherwise unsuitable, then
815		 * discard.
816		 */
817		if (MBLKL(mp) != sizeof (*ptc) || DB_REF(mp) > 1 ||
818		    !IS_P2ALIGNED(mp->b_rptr, sizeof (ptc))) {
819			KCINCR(cks_octrl_drop);
820			DTRACE_PROBE2(sppptun__bad__control, tuncl_t *, tcl,
821			    mblk_t *, mp);
822			send_control(tcl, tcl->tcl_ctrl_tll, PTCA_BADCTRL, tcl);
823			freemsg(mp);
824			return (NULL);
825		}
826		ptc = (struct ppptun_control *)mp->b_rptr;
827
828		/* Set stream discriminator value if not yet set. */
829		if (tcl->tcl_ctlval == 0)
830			tcl->tcl_ctlval = ptc->ptc_discrim;
831
832		/* If this is a test message, then reply to caller. */
833		if (ptc->ptc_action == PTCA_TEST) {
834			DTRACE_PROBE2(sppptun__test, tuncl_t *, tcl,
835			    struct ppptun_control *, ptc);
836			if (mp->b_cont != NULL) {
837				freemsg(mp->b_cont);
838				mp->b_cont = NULL;
839			}
840			ptc->ptc_discrim = tcl->tcl_ctlval;
841			putnext(RD(q), mp);
842			return (NULL);
843		}
844
845		/* If this one isn't for us, then discard it */
846		if (tcl->tcl_ctlval != ptc->ptc_discrim) {
847			DTRACE_PROBE2(sppptun__bad__discrim, tuncl_t *, tcl,
848			    struct ppptun_control *, ptc);
849			freemsg(mp);
850			return (NULL);
851		}
852
853		/* Don't allow empty control packets. */
854		tll = tcl->tcl_ctrl_tll;
855		if (mp->b_cont == NULL) {
856			KCINCR(cks_octrl_drop);
857			DTRACE_PROBE2(sppptun__bad__control, tuncl_t *, tcl,
858			    mblk_t *, mp);
859			send_control(tcl, tll, PTCA_BADCTRL, tcl);
860			freemsg(mp);
861			return (NULL);
862		}
863	}
864
865	if (tll == NULL || (lowerq = tll->tll_wq) == NULL) {
866		DTRACE_PROBE3(sppptun__cannot__send, tuncl_t *, tcl,
867		    tunll_t *, tll, mblk_t *, mp);
868		send_control(tcl, tll, PTCA_UNPLUMB, tcl);
869		freemsg(mp);
870		if (isdata) {
871			tcl->tcl_stats.ppp_oerrors++;
872		} else {
873			KCINCR(cks_octrl_drop);
874		}
875		return (NULL);
876	}
877
878	/*
879	 * If so, then try to send it down.  The lower queue is only
880	 * ever detached while holding an exclusive lock on the whole
881	 * driver, so we can be confident that the lower queue is
882	 * still there.
883	 */
884	if (!bcanputnext(lowerq, mp->b_band)) {
885		DTRACE_PROBE3(sppptun__flow__control, tuncl_t *, tcl,
886		    tunll_t *, tll, mblk_t *, mp);
887		*mpp = mp;
888		return (NULL);
889	}
890
891	/*
892	 * Note: DLPI and TPI expect that the first buffer contains
893	 * the control (unitdata-req) header, destination address, and
894	 * nothing else.  Any protocol headers must go in the next
895	 * buffer.
896	 */
897	loopup = luNone;
898	encmb = NULL;
899	if (isdata) {
900		if (tll->tll_alen != 0 &&
901		    bcmp(&tcl->tcl_address, &tll->tll_lcladdr,
902		    tll->tll_alen) == 0)
903			loopup = luSend;
904		switch (tll->tll_style) {
905		case PTS_PPPOE:
906			/* Strip address and control fields if present. */
907			if (mp->b_rptr[0] == 0xFF) {
908				if (MBLKL(mp) < 3) {
909					encmb = msgpullup(mp, 3);
910					freemsg(mp);
911					if ((mp = encmb) == NULL)
912						break;
913				}
914				mp->b_rptr += 2;
915			}
916			/* Broadcasting data is probably not a good idea. */
917			if (tcl->tcl_address.pta_pppoe.ptma_mac[0] & 1)
918				break;
919			encmb = dlpi_alloc(sizeof (*dur) + sizeof (*edestp),
920			    DL_UNITDATA_REQ);
921			if (encmb == NULL)
922				break;
923
924			dur = (dl_unitdata_req_t *)encmb->b_rptr;
925			dur->dl_dest_addr_length = sizeof (*edestp);
926			dur->dl_dest_addr_offset = sizeof (*dur);
927			edestp = (ether_dest_t *)(dur + 1);
928			ether_copy(tcl->tcl_address.pta_pppoe.ptma_mac,
929			    edestp->addr);
930			/* DLPI SAPs are in host byte order! */
931			edestp->type = tll->tll_sap;
932
933			/* Make sure the protocol field isn't compressed. */
934			len = (*mp->b_rptr & 1);
935			mp = prependb(mp, sizeof (*poep) + len, POE_HDR_ALIGN);
936			if (mp == NULL)
937				break;
938			poep = (poep_t *)mp->b_rptr;
939			poep->poep_version_type = POE_VERSION;
940			poep->poep_code = POECODE_DATA;
941			poep->poep_session_id = htons(tcl->tcl_rsessid);
942			poep->poep_length = htons(msgsize(mp) -
943			    sizeof (*poep));
944			if (len > 0)
945				*(char *)(poep + 1) = '\0';
946			break;
947
948		default:
949			ASSERT(0);
950		}
951	} else {
952		/*
953		 * Control side encapsulation.
954		 */
955		if (bcmp(&ptc->ptc_address, &tll->tll_lcladdr, tll->tll_alen)
956		    == 0)
957			loopup = luSend;
958		datamb = mp->b_cont;
959		switch (tll->tll_style) {
960		case PTS_PPPOE:
961			/*
962			 * Don't allow a loopback session to establish
963			 * itself.  PPPoE is broken; it uses only one
964			 * session ID for both data directions, so the
965			 * loopback data path can simply never work.
966			 */
967			if (loopup == luSend &&
968			    ((poep_t *)datamb->b_rptr)->poep_code ==
969			    POECODE_PADR)
970				break;
971			encmb = dlpi_alloc(sizeof (*dur) + sizeof (*edestp),
972			    DL_UNITDATA_REQ);
973			if (encmb == NULL)
974				break;
975			dur = (dl_unitdata_req_t *)encmb->b_rptr;
976			dur->dl_dest_addr_length = sizeof (*edestp);
977			dur->dl_dest_addr_offset = sizeof (*dur);
978
979			edestp = (ether_dest_t *)(dur + 1);
980			/* DLPI SAPs are in host byte order! */
981			edestp->type = tll->tll_sap;
982
983			/*
984			 * If destination isn't set yet, then we have to
985			 * allow anything at all.  Otherwise, force use
986			 * of configured peer address.
987			 */
988			if (bcmp(tcl->tcl_address.pta_pppoe.ptma_mac,
989			    zero_mac_addr, sizeof (zero_mac_addr)) == 0 ||
990			    (tcl->tcl_flags & TCLF_DAEMON)) {
991				ether_copy(ptc->ptc_address.pta_pppoe.ptma_mac,
992				    edestp->addr);
993			} else {
994				ether_copy(tcl->tcl_address.pta_pppoe.ptma_mac,
995				    edestp->addr);
996			}
997			/* Reflect multicast/broadcast back up. */
998			if (edestp->addr[0] & 1)
999				loopup = luCopy;
1000			break;
1001
1002		case PTS_PPTP:
1003			/*
1004			 * PPTP's control side is actually done over
1005			 * separate TCP connections.
1006			 */
1007		default:
1008			ASSERT(0);
1009		}
1010		freeb(mp);
1011		mp = datamb;
1012	}
1013	if (mp == NULL || encmb == NULL) {
1014		DTRACE_PROBE1(sppptun__output__failure, tuncl_t *, tcl);
1015		freemsg(mp);
1016		freemsg(encmb);
1017		if (isdata) {
1018			tcl->tcl_stats.ppp_oerrors++;
1019		} else {
1020			KCINCR(cks_octrl_drop);
1021			KLINCR(lks_octrl_drop);
1022		}
1023		lowerq = NULL;
1024	} else {
1025		if (isdata) {
1026			tcl->tcl_stats.ppp_obytes += msgsize(mp);
1027			tcl->tcl_stats.ppp_opackets++;
1028		} else {
1029			KCINCR(cks_octrls);
1030			KLINCR(lks_octrls);
1031		}
1032		if (encmb != mp)
1033			encmb->b_cont = mp;
1034		switch (loopup) {
1035		case luNone:
1036			*mpp = encmb;
1037			break;
1038		case luCopy:
1039			mp = copymsg(encmb);
1040			if (mp != NULL)
1041				sppptun_urput(RD(lowerq), mp);
1042			*mpp = encmb;
1043			break;
1044		case luSend:
1045			sppptun_urput(RD(lowerq), encmb);
1046			lowerq = NULL;
1047			break;
1048		}
1049	}
1050	return (lowerq);
1051}
1052
1053/*
1054 * Enqueue a message to be sent when the lower stream is closed.  This
1055 * is done so that we're guaranteed that we always have the necessary
1056 * resources to properly detach ourselves from the system.  (If we
1057 * waited until the close was done to allocate these messages, then
1058 * the message allocation could fail, and we'd be unable to properly
1059 * detach.)
1060 */
1061static void
1062save_for_close(tunll_t *tll, mblk_t *mp)
1063{
1064	mblk_t *onc;
1065
1066	if ((onc = tll->tll_onclose) == NULL)
1067		tll->tll_onclose = mp;
1068	else {
1069		while (onc->b_next != NULL)
1070			onc = onc->b_next;
1071		onc->b_next = mp;
1072	}
1073}
1074
1075/*
1076 * Given the lower stream name, locate the state structure.  Note that
1077 * lookup of tcl pointers (and use of those pointers) is safe because
1078 * modification is done only when exclusive on both inner and outer
1079 * perimeters.
1080 */
1081static tunll_t *
1082tll_lookup_on_name(const char *dname, zoneid_t zoneid)
1083{
1084	tunll_t *tll;
1085
1086	tll = TO_TLL(tunll_list.q_forw);
1087	for (; tll != TO_TLL(&tunll_list); tll = TO_TLL(tll->tll_next))
1088		if (tll->tll_zoneid == zoneid &&
1089		    strcmp(dname, tll->tll_name) == 0)
1090			return (tll);
1091	return (NULL);
1092}
1093
1094/*
1095 * sppptun_inner_ioctl()
1096 *
1097 * MT-Perimeters:
1098 *    exclusive inner, shared outer.
1099 *
1100 * Description:
1101 *    Called by qwriter from sppptun_ioctl as the result of receiving
1102 *    a handled ioctl.
1103 */
1104static void
1105sppptun_inner_ioctl(queue_t *q, mblk_t *mp)
1106{
1107	struct iocblk *iop;
1108	int rc = 0;
1109	int len = 0;
1110	int i;
1111	tuncl_t *tcl;
1112	tunll_t *tll;
1113	union ppptun_name *ptn;
1114	struct ppptun_info *pti;
1115	struct ppptun_peer *ptp;
1116	mblk_t *mptmp;
1117	ppptun_atype *pap;
1118	struct ppp_stats64 *psp;
1119	zoneid_t zoneid;
1120
1121	iop = (struct iocblk *)mp->b_rptr;
1122	tcl = NULL;
1123	tll = q->q_ptr;
1124	if (tll->tll_flags & TLLF_NOTLOWER) {
1125		tcl = (tuncl_t *)tll;
1126		tll = NULL;
1127	}
1128
1129	DTRACE_PROBE3(sppptun__ioctl, tuncl_t *, tcl, tunll_t *, tll,
1130	    struct iocblk *, iop);
1131
1132	switch (iop->ioc_cmd) {
1133	case PPPIO_DEBUG:
1134		/*
1135		 * Debug requests are now ignored; use dtrace or wireshark
1136		 * instead.
1137		 */
1138		break;
1139
1140	case PPPIO_GETSTAT:
1141		rc = EINVAL;
1142		break;
1143
1144	case PPPIO_GETSTAT64:
1145		/* Client (device) side only */
1146		if (tcl == NULL) {
1147			rc = EINVAL;
1148			break;
1149		}
1150		mptmp = allocb(sizeof (*psp), BPRI_HI);
1151		if (mptmp == NULL) {
1152			rc = ENOSR;
1153			break;
1154		}
1155		freemsg(mp->b_cont);
1156		mp->b_cont = mptmp;
1157
1158		psp = (struct ppp_stats64 *)mptmp->b_wptr;
1159		bzero((caddr_t)psp, sizeof (*psp));
1160		psp->p = tcl->tcl_stats;
1161
1162		len = sizeof (*psp);
1163		break;
1164
1165	case PPPTUN_SNAME:
1166		/* This is done on the *module* (lower level) side. */
1167		if (tll == NULL || mp->b_cont == NULL ||
1168		    iop->ioc_count != sizeof (*ptn) ||
1169		    *mp->b_cont->b_rptr == '\0') {
1170			rc = EINVAL;
1171			break;
1172		}
1173
1174		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1175		ptn->ptn_name[sizeof (ptn->ptn_name) - 1] = '\0';
1176
1177		tll = tll_lookup_on_name(ptn->ptn_name, tll->tll_zoneid);
1178		if (tll != NULL) {
1179			rc = EEXIST;
1180			break;
1181		}
1182		tll = (tunll_t *)q->q_ptr;
1183		(void) strcpy(tll->tll_name, ptn->ptn_name);
1184		break;
1185
1186	case PPPTUN_SINFO:
1187	case PPPTUN_GINFO:
1188		/* Either side */
1189		if (mp->b_cont == NULL || iop->ioc_count != sizeof (*pti)) {
1190			rc = EINVAL;
1191			break;
1192		}
1193		pti = (struct ppptun_info *)mp->b_cont->b_rptr;
1194		if (pti->pti_name[0] != '\0')
1195			tll = tll_lookup_on_name(pti->pti_name,
1196			    tcl == NULL ? tll->tll_zoneid : tcl->tcl_zoneid);
1197		if (tll == NULL) {
1198			/* Driver (client) side must have name */
1199			if (tcl != NULL && pti->pti_name[0] == '\0')
1200				rc = EINVAL;
1201			else
1202				rc = ESRCH;
1203			break;
1204		}
1205		if (iop->ioc_cmd == PPPTUN_GINFO) {
1206			pti->pti_muxid = tll->tll_muxid;
1207			pti->pti_style = tll->tll_style;
1208			len = sizeof (*pti);
1209			break;
1210		}
1211		tll->tll_muxid = pti->pti_muxid;
1212		tll->tll_style = pti->pti_style;
1213		switch (tll->tll_style) {
1214		case PTS_PPPOE:		/* DLPI type */
1215			tll->tll_alen = sizeof (tll->tll_lcladdr.pta_pppoe);
1216			mptmp = dlpi_alloc(sizeof (dl_unbind_req_t),
1217			    DL_UNBIND_REQ);
1218			if (mptmp == NULL) {
1219				rc = ENOSR;
1220				break;
1221			}
1222			save_for_close(tll, mptmp);
1223			mptmp = dlpi_alloc(sizeof (dl_detach_req_t),
1224			    DL_DETACH_REQ);
1225			if (mptmp == NULL) {
1226				rc = ENOSR;
1227				break;
1228			}
1229			save_for_close(tll, mptmp);
1230			break;
1231		default:
1232			tll->tll_style = PTS_NONE;
1233			tll->tll_alen = 0;
1234			rc = EINVAL;
1235			break;
1236		}
1237		break;
1238
1239	case PPPTUN_GNNAME:
1240		/* This can be done on either side. */
1241		if (mp->b_cont == NULL || iop->ioc_count < sizeof (uint32_t)) {
1242			rc = EINVAL;
1243			break;
1244		}
1245		zoneid = tcl == NULL ? tll->tll_zoneid : tcl->tcl_zoneid;
1246		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1247		i = ptn->ptn_index;
1248		tll = TO_TLL(tunll_list.q_forw);
1249		while (tll != TO_TLL(&tunll_list)) {
1250			if (tll->tll_zoneid == zoneid && --i < 0)
1251				break;
1252			tll = TO_TLL(tll->tll_next);
1253		}
1254		if (tll != TO_TLL(&tunll_list)) {
1255			bcopy(tll->tll_name, ptn->ptn_name,
1256			    sizeof (ptn->ptn_name));
1257		} else {
1258			bzero(ptn, sizeof (*ptn));
1259		}
1260		len = sizeof (*ptn);
1261		break;
1262
1263	case PPPTUN_LCLADDR:
1264		/* This is done on the *module* (lower level) side. */
1265		if (tll == NULL || mp->b_cont == NULL) {
1266			rc = EINVAL;
1267			break;
1268		}
1269
1270		pap = &tll->tll_lcladdr;
1271		len = tll->tll_alen;
1272		if (len == 0 || len > iop->ioc_count) {
1273			rc = EINVAL;
1274			break;
1275		}
1276		bcopy(mp->b_cont->b_rptr, pap, len);
1277		len = 0;
1278		break;
1279
1280	case PPPTUN_SPEER:
1281		/* Client (device) side only; before SDATA */
1282		if (tcl == NULL || mp->b_cont == NULL ||
1283		    iop->ioc_count != sizeof (*ptp)) {
1284			rc = EINVAL;
1285			break;
1286		}
1287		if (tcl->tcl_data_tll != NULL) {
1288			rc = EINVAL;
1289			break;
1290		}
1291		ptp = (struct ppptun_peer *)mp->b_cont->b_rptr;
1292		DTRACE_PROBE2(sppptun__speer, tuncl_t *, tcl,
1293		    struct ppptun_peer *, ptp);
1294		/* Once set, the style cannot change. */
1295		if (tcl->tcl_style != PTS_NONE &&
1296		    tcl->tcl_style != ptp->ptp_style) {
1297			rc = EINVAL;
1298			break;
1299		}
1300		if (ptp->ptp_flags & PTPF_DAEMON) {
1301			/* User requests registration for tunnel 0 */
1302			if ((tcl->tcl_flags & TCLF_SPEER_DONE) ||
1303			    ptp->ptp_ltunid != 0 || ptp->ptp_rtunid != 0 ||
1304			    ptp->ptp_lsessid != 0 || ptp->ptp_rsessid != 0) {
1305				rc = EINVAL;
1306				break;
1307			}
1308			tcl->tcl_flags |= TCLF_DAEMON;
1309		} else {
1310			/* Normal client connection */
1311			if (tcl->tcl_flags & TCLF_DAEMON) {
1312				rc = EINVAL;
1313				break;
1314			}
1315			if (ptp->ptp_lsessid != 0 &&
1316			    ptp->ptp_lsessid != tcl->tcl_lsessid) {
1317				rc = EINVAL;
1318				break;
1319			}
1320			/*
1321			 * If we're reassigning the peer data, then
1322			 * the previous assignment must have been for
1323			 * a client control connection.  Check that.
1324			 */
1325			if ((tcl->tcl_flags & TCLF_SPEER_DONE) &&
1326			    ((tcl->tcl_ltunid != 0 &&
1327			    tcl->tcl_ltunid != ptp->ptp_ltunid) ||
1328			    (tcl->tcl_rtunid != 0 &&
1329			    tcl->tcl_rtunid != ptp->ptp_rtunid) ||
1330			    (tcl->tcl_rsessid != 0 &&
1331			    tcl->tcl_rsessid != ptp->ptp_rsessid))) {
1332				rc = EINVAL;
1333				break;
1334			}
1335			if ((tcl->tcl_ltunid = ptp->ptp_ltunid) == 0 &&
1336			    tcl->tcl_style == PTS_L2FTP)
1337				tcl->tcl_ltunid = ptp->ptp_lsessid;
1338			tcl->tcl_rtunid = ptp->ptp_rtunid;
1339			tcl->tcl_rsessid = ptp->ptp_rsessid;
1340		}
1341		tcl->tcl_flags |= TCLF_SPEER_DONE;
1342		tcl->tcl_style = ptp->ptp_style;
1343		tcl->tcl_address = ptp->ptp_address;
1344		goto fill_in_peer;
1345
1346	case PPPTUN_GPEER:
1347		/* Client (device) side only */
1348		if (tcl == NULL) {
1349			rc = EINVAL;
1350			break;
1351		}
1352		if (mp->b_cont != NULL)
1353			freemsg(mp->b_cont);
1354		mp->b_cont = allocb(sizeof (*ptp), BPRI_HI);
1355		if (mp->b_cont == NULL) {
1356			rc = ENOSR;
1357			break;
1358		}
1359		ptp = (struct ppptun_peer *)mp->b_cont->b_rptr;
1360	fill_in_peer:
1361		ptp->ptp_style = tcl->tcl_style;
1362		ptp->ptp_flags = (tcl->tcl_flags & TCLF_DAEMON) ? PTPF_DAEMON :
1363		    0;
1364		ptp->ptp_ltunid = tcl->tcl_ltunid;
1365		ptp->ptp_rtunid = tcl->tcl_rtunid;
1366		ptp->ptp_lsessid = tcl->tcl_lsessid;
1367		ptp->ptp_rsessid = tcl->tcl_rsessid;
1368		ptp->ptp_address = tcl->tcl_address;
1369		len = sizeof (*ptp);
1370		break;
1371
1372	case PPPTUN_SDATA:
1373	case PPPTUN_SCTL:
1374		/* Client (device) side only; must do SPEER first */
1375		if (tcl == NULL || mp->b_cont == NULL ||
1376		    iop->ioc_count != sizeof (*ptn) ||
1377		    *mp->b_cont->b_rptr == '\0') {
1378			rc = EINVAL;
1379			break;
1380		}
1381		if (!(tcl->tcl_flags & TCLF_SPEER_DONE)) {
1382			rc = EINVAL;
1383			break;
1384		}
1385		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1386		ptn->ptn_name[sizeof (ptn->ptn_name) - 1] = '\0';
1387		tll = tll_lookup_on_name(ptn->ptn_name, tcl->tcl_zoneid);
1388		if (tll == NULL) {
1389			rc = ESRCH;
1390			break;
1391		}
1392		if (tll->tll_style != tcl->tcl_style) {
1393			rc = ENXIO;
1394			break;
1395		}
1396		if (iop->ioc_cmd == PPPTUN_SDATA) {
1397			if (tcl->tcl_data_tll != NULL) {
1398				rc = EEXIST;
1399				break;
1400			}
1401			/* server daemons cannot use regular data */
1402			if (tcl->tcl_flags & TCLF_DAEMON) {
1403				rc = EINVAL;
1404				break;
1405			}
1406			tcl->tcl_data_tll = tll;
1407		} else if (tcl->tcl_flags & TCLF_DAEMON) {
1408			if (tll->tll_defcl != NULL && tll->tll_defcl != tcl) {
1409				rc = EEXIST;
1410				break;
1411			}
1412			tll->tll_defcl = tcl;
1413			if (tcl->tcl_ctrl_tll != NULL) {
1414				KDECR(tcl->tcl_ctrl_tll, tll_kstats,
1415				    lks_clients);
1416			}
1417			tcl->tcl_ctrl_tll = tll;
1418		} else {
1419			if (tcl->tcl_ctrl_tll != NULL) {
1420				rc = EEXIST;
1421				break;
1422			}
1423			tcl->tcl_ctrl_tll = tll;
1424		}
1425		KLINCR(lks_clients);
1426		break;
1427
1428	case PPPTUN_GDATA:
1429	case PPPTUN_GCTL:
1430		/* Client (device) side only */
1431		if (tcl == NULL) {
1432			rc = EINVAL;
1433			break;
1434		}
1435		if (mp->b_cont != NULL)
1436			freemsg(mp->b_cont);
1437		mp->b_cont = allocb(sizeof (*ptn), BPRI_HI);
1438		if (mp->b_cont == NULL) {
1439			rc = ENOSR;
1440			break;
1441		}
1442		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1443		if (iop->ioc_cmd == PPPTUN_GDATA)
1444			tll = tcl->tcl_data_tll;
1445		else
1446			tll = tcl->tcl_ctrl_tll;
1447		if (tll == NULL)
1448			bzero(ptn, sizeof (*ptn));
1449		else
1450			bcopy(tll->tll_name, ptn->ptn_name,
1451			    sizeof (ptn->ptn_name));
1452		len = sizeof (*ptn);
1453		break;
1454
1455	case PPPTUN_DCTL:
1456		/* Client (device) side daemon mode only */
1457		if (tcl == NULL || mp->b_cont == NULL ||
1458		    iop->ioc_count != sizeof (*ptn) ||
1459		    !(tcl->tcl_flags & TCLF_DAEMON)) {
1460			rc = EINVAL;
1461			break;
1462		}
1463		ptn = (union ppptun_name *)mp->b_cont->b_rptr;
1464		ptn->ptn_name[sizeof (ptn->ptn_name) - 1] = '\0';
1465		tll = tll_lookup_on_name(ptn->ptn_name, tcl->tcl_zoneid);
1466		if (tll == NULL || tll->tll_defcl != tcl) {
1467			rc = ESRCH;
1468			break;
1469		}
1470		tll->tll_defcl = NULL;
1471		break;
1472
1473	case PPPTUN_SSAP:
1474		/* This is done on the *module* (lower level) side. */
1475		if (tll == NULL || mp->b_cont == NULL ||
1476		    iop->ioc_count != sizeof (uint_t)) {
1477			rc = EINVAL;
1478			break;
1479		}
1480
1481		tll->tll_sap = *(uint_t *)mp->b_cont->b_rptr;
1482		break;
1483
1484	default:
1485		/* Caller should already have checked command value */
1486		ASSERT(0);
1487	}
1488	if (rc != 0) {
1489		miocnak(q, mp, 0, rc);
1490	} else {
1491		if (len > 0)
1492			mp->b_cont->b_wptr = mp->b_cont->b_rptr + len;
1493		miocack(q, mp, len, 0);
1494	}
1495}
1496
1497/*
1498 * sppptun_ioctl()
1499 *
1500 * MT-Perimeters:
1501 *    shared inner, shared outer.
1502 *
1503 * Description:
1504 *    Called by sppptun_uwput as the result of receiving a M_IOCTL command.
1505 */
1506static void
1507sppptun_ioctl(queue_t *q, mblk_t *mp)
1508{
1509	struct iocblk *iop;
1510	int rc = 0;
1511	int len = 0;
1512	uint32_t val = 0;
1513	tunll_t *tll;
1514
1515	iop = (struct iocblk *)mp->b_rptr;
1516
1517	switch (iop->ioc_cmd) {
1518	case PPPIO_DEBUG:
1519	case PPPIO_GETSTAT:
1520	case PPPIO_GETSTAT64:
1521	case PPPTUN_SNAME:
1522	case PPPTUN_SINFO:
1523	case PPPTUN_GINFO:
1524	case PPPTUN_GNNAME:
1525	case PPPTUN_LCLADDR:
1526	case PPPTUN_SPEER:
1527	case PPPTUN_GPEER:
1528	case PPPTUN_SDATA:
1529	case PPPTUN_GDATA:
1530	case PPPTUN_SCTL:
1531	case PPPTUN_GCTL:
1532	case PPPTUN_DCTL:
1533	case PPPTUN_SSAP:
1534		qwriter(q, mp, sppptun_inner_ioctl, PERIM_INNER);
1535		return;
1536
1537	case PPPIO_GCLEAN:	/* always clean */
1538		val = RCV_B7_1 | RCV_B7_0 | RCV_ODDP | RCV_EVNP;
1539		len = sizeof (uint32_t);
1540		break;
1541
1542	case PPPIO_GTYPE:	/* we look like an async driver. */
1543		val = PPPTYP_AHDLC;
1544		len = sizeof (uint32_t);
1545		break;
1546
1547	case PPPIO_CFLAGS:	/* never compress headers */
1548		val = 0;
1549		len = sizeof (uint32_t);
1550		break;
1551
1552		/* quietly ack PPP things we don't need to do. */
1553	case PPPIO_XFCS:
1554	case PPPIO_RFCS:
1555	case PPPIO_XACCM:
1556	case PPPIO_RACCM:
1557	case PPPIO_LASTMOD:
1558	case PPPIO_MUX:
1559	case I_PLINK:
1560	case I_PUNLINK:
1561	case I_LINK:
1562	case I_UNLINK:
1563		break;
1564
1565	default:
1566		tll = (tunll_t *)q->q_ptr;
1567		if (!(tll->tll_flags & TLLF_NOTLOWER)) {
1568			/* module side; pass this through. */
1569			putnext(q, mp);
1570			return;
1571		}
1572		rc = EINVAL;
1573		break;
1574	}
1575	if (rc == 0 && len == sizeof (uint32_t)) {
1576		if (mp->b_cont != NULL)
1577			freemsg(mp->b_cont);
1578		mp->b_cont = allocb(sizeof (uint32_t), BPRI_HI);
1579		if (mp->b_cont == NULL) {
1580			rc = ENOSR;
1581		} else {
1582			*(uint32_t *)mp->b_cont->b_wptr = val;
1583			mp->b_cont->b_wptr += sizeof (uint32_t);
1584		}
1585	}
1586	if (rc == 0) {
1587		miocack(q, mp, len, 0);
1588	} else {
1589		miocnak(q, mp, 0, rc);
1590	}
1591}
1592
1593/*
1594 * sppptun_inner_mctl()
1595 *
1596 * MT-Perimeters:
1597 *    exclusive inner, shared outer.
1598 *
1599 * Description:
1600 *    Called by qwriter (via sppptun_uwput) as the result of receiving
1601 *    an M_CTL.  Called only on the client (driver) side.
1602 */
1603static void
1604sppptun_inner_mctl(queue_t *q, mblk_t *mp)
1605{
1606	int msglen;
1607	tuncl_t *tcl;
1608
1609	tcl = q->q_ptr;
1610
1611	if (!(tcl->tcl_flags & TCLF_ISCLIENT)) {
1612		freemsg(mp);
1613		return;
1614	}
1615
1616	msglen = MBLKL(mp);
1617	switch (*mp->b_rptr) {
1618	case PPPCTL_UNIT:
1619		if (msglen == 2)
1620			tcl->tcl_unit = mp->b_rptr[1];
1621		else if (msglen == 8)
1622			tcl->tcl_unit = ((uint32_t *)mp->b_rptr)[1];
1623		break;
1624	}
1625	freemsg(mp);
1626}
1627
1628/*
1629 * sppptun_uwput()
1630 *
1631 * MT-Perimeters:
1632 *    shared inner, shared outer.
1633 *
1634 * Description:
1635 *	Regular output data and controls pass through here.
1636 */
1637static void
1638sppptun_uwput(queue_t *q, mblk_t *mp)
1639{
1640	queue_t *nextq;
1641	tuncl_t *tcl;
1642
1643	ASSERT(q->q_ptr != NULL);
1644
1645	switch (MTYPE(mp)) {
1646	case M_DATA:
1647	case M_PROTO:
1648	case M_PCPROTO:
1649		if (q->q_first == NULL &&
1650		    (nextq = sppptun_outpkt(q, &mp)) != NULL) {
1651			putnext(nextq, mp);
1652		} else if (mp != NULL && !putq(q, mp)) {
1653			freemsg(mp);
1654		}
1655		break;
1656	case M_IOCTL:
1657		sppptun_ioctl(q, mp);
1658		break;
1659	case M_CTL:
1660		qwriter(q, mp, sppptun_inner_mctl, PERIM_INNER);
1661		break;
1662	default:
1663		tcl = (tuncl_t *)q->q_ptr;
1664		/*
1665		 * If we're the driver, then discard unknown junk.
1666		 * Otherwise, if we're the module, then forward along.
1667		 */
1668		if (tcl->tcl_flags & TCLF_ISCLIENT)
1669			freemsg(mp);
1670		else
1671			putnext(q, mp);
1672		break;
1673	}
1674}
1675
1676/*
1677 * Send a DLPI/TPI control message to the driver but make sure there
1678 * is only one outstanding message.  Uses tll_msg_pending to tell when
1679 * it must queue.  sppptun_urput calls message_done() when an ACK or a
1680 * NAK is received to process the next queued message.
1681 */
1682static void
1683message_send(tunll_t *tll, mblk_t *mp)
1684{
1685	mblk_t **mpp;
1686
1687	if (tll->tll_msg_pending) {
1688		/* Must queue message. Tail insertion */
1689		mpp = &tll->tll_msg_deferred;
1690		while (*mpp != NULL)
1691			mpp = &((*mpp)->b_next);
1692		*mpp = mp;
1693		return;
1694	}
1695	tll->tll_msg_pending = 1;
1696	putnext(tll->tll_wq, mp);
1697}
1698
1699/*
1700 * Called when an DLPI/TPI control message has been acked or nacked to
1701 * send down the next queued message (if any).
1702 */
1703static void
1704message_done(tunll_t *tll)
1705{
1706	mblk_t *mp;
1707
1708	ASSERT(tll->tll_msg_pending);
1709	tll->tll_msg_pending = 0;
1710	mp = tll->tll_msg_deferred;
1711	if (mp != NULL) {
1712		tll->tll_msg_deferred = mp->b_next;
1713		mp->b_next = NULL;
1714		tll->tll_msg_pending = 1;
1715		putnext(tll->tll_wq, mp);
1716	}
1717}
1718
1719/*
1720 * Send down queued "close" messages to lower stream.  These were
1721 * enqueued right after the stream was originally allocated, when the
1722 * tll_style was set by PPPTUN_SINFO.
1723 */
1724static int
1725tll_close_req(tunll_t *tll)
1726{
1727	mblk_t *mb, *mbnext;
1728
1729	if ((mb = tll->tll_onclose) == NULL)
1730		tll->tll_flags |= TLLF_SHUTDOWN_DONE;
1731	else {
1732		tll->tll_onclose = NULL;
1733		while (mb != NULL) {
1734			mbnext = mb->b_next;
1735			mb->b_next = NULL;
1736			message_send(tll, mb);
1737			mb = mbnext;
1738		}
1739	}
1740	return (0);
1741}
1742
1743/*
1744 * This function is called when a backenable occurs on the write side of a
1745 * lower stream.  It walks over the client streams, looking for ones that use
1746 * the given tunll_t lower stream.  Each client is then backenabled.
1747 */
1748static void
1749tclvm_backenable(void *arg, void *firstv, size_t numv)
1750{
1751	tunll_t *tll = arg;
1752	int minorn = (int)(uintptr_t)firstv;
1753	int minormax = minorn + numv;
1754	tuncl_t *tcl;
1755	queue_t *q;
1756
1757	while (minorn < minormax) {
1758		tcl = tcl_slots[minorn - 1];
1759		if ((tcl->tcl_data_tll == tll ||
1760		    tcl->tcl_ctrl_tll == tll) &&
1761		    (q = tcl->tcl_rq) != NULL) {
1762			qenable(OTHERQ(q));
1763		}
1764		minorn++;
1765	}
1766}
1767
1768/*
1769 * sppptun_uwsrv()
1770 *
1771 * MT-Perimeters:
1772 *    exclusive inner, shared outer.
1773 *
1774 * Description:
1775 *    Upper write-side service procedure.  In addition to the usual
1776 *    STREAMS queue service handling, this routine also handles the
1777 *    transmission of the unbind/detach messages to the lower stream
1778 *    driver when a lower stream is being closed.  (See the use of
1779 *    qenable/qwait in sppptun_close().)
1780 */
1781static int
1782sppptun_uwsrv(queue_t *q)
1783{
1784	tuncl_t	*tcl;
1785	mblk_t *mp;
1786	queue_t *nextq;
1787
1788	tcl = q->q_ptr;
1789	if (!(tcl->tcl_flags & TCLF_ISCLIENT)) {
1790		tunll_t *tll = (tunll_t *)tcl;
1791
1792		if ((tll->tll_flags & (TLLF_CLOSING|TLLF_CLOSE_DONE)) ==
1793		    TLLF_CLOSING) {
1794			tll->tll_error = tll_close_req(tll);
1795			tll->tll_flags |= TLLF_CLOSE_DONE;
1796		} else {
1797			/*
1798			 * We've been enabled here because of a backenable on
1799			 * output flow control.  Backenable clients using this
1800			 * lower layer.
1801			 */
1802			vmem_walk(tcl_minor_arena, VMEM_ALLOC, tclvm_backenable,
1803			    tll);
1804		}
1805		return (0);
1806	}
1807
1808	while ((mp = getq(q)) != NULL) {
1809		if ((nextq = sppptun_outpkt(q, &mp)) != NULL) {
1810			putnext(nextq, mp);
1811		} else if (mp != NULL) {
1812			(void) putbq(q, mp);
1813			break;
1814		}
1815	}
1816	return (0);
1817}
1818
1819/*
1820 * sppptun_lwput()
1821 *
1822 * MT-Perimeters:
1823 *    shared inner, shared outer.
1824 *
1825 * Description:
1826 *    Lower write-side put procedure.  Nothing should be sending
1827 *    packets down this stream.
1828 */
1829static void
1830sppptun_lwput(queue_t *q, mblk_t *mp)
1831{
1832	switch (MTYPE(mp)) {
1833	case M_PROTO:
1834		putnext(q, mp);
1835		break;
1836	default:
1837		freemsg(mp);
1838		break;
1839	}
1840}
1841
1842/*
1843 * sppptun_lrput()
1844 *
1845 * MT-Perimeters:
1846 *    shared inner, shared outer.
1847 *
1848 * Description:
1849 *    Lower read-side put procedure.  Nothing should arrive here.
1850 */
1851static void
1852sppptun_lrput(queue_t *q, mblk_t *mp)
1853{
1854	tuncl_t *tcl;
1855
1856	switch (MTYPE(mp)) {
1857	case M_IOCTL:
1858		miocnak(q, mp, 0, EINVAL);
1859		return;
1860	case M_FLUSH:
1861		if (*mp->b_rptr & FLUSHR) {
1862			flushq(q, FLUSHDATA);
1863		}
1864		if (*mp->b_rptr & FLUSHW) {
1865			*mp->b_rptr &= ~FLUSHR;
1866			qreply(q, mp);
1867		} else {
1868			freemsg(mp);
1869		}
1870		return;
1871	}
1872	/*
1873	 * Try to forward the message to the put procedure for the upper
1874	 * control stream for this lower stream. If there are already messages
1875	 * queued here, queue this one up to preserve message ordering.
1876	 */
1877	if ((tcl = (tuncl_t *)q->q_ptr) == NULL || tcl->tcl_rq == NULL) {
1878		freemsg(mp);
1879		return;
1880	}
1881	if (queclass(mp) == QPCTL ||
1882	    (q->q_first == NULL && canput(tcl->tcl_rq))) {
1883		put(tcl->tcl_rq, mp);
1884	} else {
1885		if (!putq(q, mp))
1886			freemsg(mp);
1887	}
1888}
1889
1890/*
1891 * MT-Perimeters:
1892 *    shared inner, shared outer.
1893 *
1894 *    Handle non-data DLPI messages.  Used with PPPoE, which runs over
1895 *    Ethernet only.
1896 */
1897static void
1898urput_dlpi(queue_t *q, mblk_t *mp)
1899{
1900	int err;
1901	union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
1902	tunll_t *tll = q->q_ptr;
1903	size_t mlen = MBLKL(mp);
1904
1905	switch (dlp->dl_primitive) {
1906	case DL_UDERROR_IND:
1907		break;
1908
1909	case DL_ERROR_ACK:
1910		if (mlen < DL_ERROR_ACK_SIZE)
1911			break;
1912		err = dlp->error_ack.dl_unix_errno ?
1913		    dlp->error_ack.dl_unix_errno : ENXIO;
1914		switch (dlp->error_ack.dl_error_primitive) {
1915		case DL_UNBIND_REQ:
1916			message_done(tll);
1917			break;
1918		case DL_DETACH_REQ:
1919			message_done(tll);
1920			tll->tll_error = err;
1921			tll->tll_flags |= TLLF_SHUTDOWN_DONE;
1922			break;
1923		case DL_PHYS_ADDR_REQ:
1924			message_done(tll);
1925			break;
1926		case DL_INFO_REQ:
1927		case DL_ATTACH_REQ:
1928		case DL_BIND_REQ:
1929			message_done(tll);
1930			tll->tll_error = err;
1931			break;
1932		}
1933		break;
1934
1935	case DL_INFO_ACK:
1936		message_done(tll);
1937		break;
1938
1939	case DL_BIND_ACK:
1940		message_done(tll);
1941		break;
1942
1943	case DL_PHYS_ADDR_ACK:
1944		break;
1945
1946	case DL_OK_ACK:
1947		if (mlen < DL_OK_ACK_SIZE)
1948			break;
1949		switch (dlp->ok_ack.dl_correct_primitive) {
1950		case DL_UNBIND_REQ:
1951			message_done(tll);
1952			break;
1953		case DL_DETACH_REQ:
1954			tll->tll_flags |= TLLF_SHUTDOWN_DONE;
1955			break;
1956		case DL_ATTACH_REQ:
1957			message_done(tll);
1958			break;
1959		}
1960		break;
1961	}
1962	freemsg(mp);
1963}
1964
1965/* Search structure used with PPPoE only; see tclvm_pppoe_search(). */
1966struct poedat {
1967	uint_t sessid;
1968	tunll_t *tll;
1969	const void *srcaddr;
1970	int isdata;
1971	tuncl_t *tcl;
1972};
1973
1974/*
1975 * This function is called by vmem_walk from within sppptun_recv.  It
1976 * iterates over a span of allocated minor node numbers to search for
1977 * the appropriate lower stream, session ID, and peer MAC address.
1978 *
1979 * (This is necessary due to a design flaw in the PPPoE protocol
1980 * itself.  The protocol assigns session IDs from the server side
1981 * only.  Both server and client use the same number.  Thus, if there
1982 * are multiple clients on a single host, there can be session ID
1983 * conflicts between servers and there's no way to detangle them
1984 * except by looking at the remote MAC address.)
1985 *
1986 * (This could have been handled by linking together sessions that
1987 * differ only in the remote MAC address.  This isn't done because it
1988 * would involve extra per-session storage and it's very unlikely that
1989 * PPPoE would be used this way.)
1990 */
1991static void
1992tclvm_pppoe_search(void *arg, void *firstv, size_t numv)
1993{
1994	struct poedat *poedat = (struct poedat *)arg;
1995	int minorn = (int)(uintptr_t)firstv;
1996	int minormax = minorn + numv;
1997	tuncl_t *tcl;
1998
1999	if (poedat->tcl != NULL)
2000		return;
2001	while (minorn < minormax) {
2002		tcl = tcl_slots[minorn - 1];
2003		ASSERT(tcl != NULL);
2004		if (tcl->tcl_rsessid == poedat->sessid &&
2005		    ((!poedat->isdata && tcl->tcl_ctrl_tll == poedat->tll) ||
2006		    (poedat->isdata && tcl->tcl_data_tll == poedat->tll)) &&
2007		    bcmp(tcl->tcl_address.pta_pppoe.ptma_mac,
2008		    poedat->srcaddr,
2009		    sizeof (tcl->tcl_address.pta_pppoe.ptma_mac)) == 0) {
2010			poedat->tcl = tcl;
2011			break;
2012		}
2013		minorn++;
2014	}
2015}
2016
2017/*
2018 * sppptun_recv()
2019 *
2020 * MT-Perimeters:
2021 *    shared inner, shared outer.
2022 *
2023 * Description:
2024 *    Receive function called by sppptun_urput, which is called when
2025 *    the lower read-side put or service procedure sends a message
2026 *    upstream to the a device user (PPP).  It attempts to find an
2027 *    appropriate queue on the module above us (depending on what the
2028 *    associated upper stream for the protocol would be), and if not
2029 *    possible, it will find an upper control stream for the protocol.
2030 *    Returns a pointer to the upper queue_t, or NULL if the message
2031 *    has been discarded.
2032 *
2033 * About demultiplexing:
2034 *
2035 *	All four protocols (L2F, PPTP, L2TP, and PPPoE) support a
2036 *	locally assigned ID for demultiplexing incoming traffic.  For
2037 *	L2F, this is called the Client ID, for PPTP the Call ID, for
2038 *	L2TP the Session ID, and for PPPoE the SESSION_ID.  This is a
2039 *	16 bit number for all four protocols, and is used to directly
2040 *	index into a list of upper streams.  With the upper stream in
2041 *	hand, we verify that this is the right stream and deliver the
2042 *	data.
2043 *
2044 *	L2TP has a Tunnel ID, which represents a bundle of PPP
2045 *	sessions between the peers.  Because we always assign unique
2046 *	session ID numbers, we merely check that the given ID matches
2047 *	the assigned ID for the upper stream.
2048 *
2049 *	L2F has a Multiplex ID, which is unique per connection.  It
2050 *	does not have L2TP's concept of multiple-connections-within-
2051 *	a-tunnel.  The same checking is done.
2052 *
2053 *	PPPoE is a horribly broken protocol.  Only one ID is assigned
2054 *	per connection.  The client must somehow demultiplex based on
2055 *	an ID number assigned by the server.  It's not necessarily
2056 *	unique.  The search is done based on {ID,peerEthernet} (using
2057 *	tcl_rsessid) for all packet types except PADI and PADS.
2058 *
2059 *	Neither PPPoE nor PPTP supports additional ID numbers.
2060 *
2061 *	Both L2F and L2TP come in over UDP.  They are distinguished by
2062 *	looking at the GRE version field -- 001 for L2F and 010 for
2063 *	L2TP.
2064 */
2065static queue_t *
2066sppptun_recv(queue_t *q, mblk_t **mpp, const void *srcaddr)
2067{
2068	mblk_t *mp;
2069	tunll_t *tll;
2070	tuncl_t *tcl;
2071	int sessid;
2072	int remlen;
2073	int msglen;
2074	int isdata;
2075	int i;
2076	const uchar_t *ucp;
2077	const poep_t *poep;
2078	mblk_t *mnew;
2079	ppptun_atype *pap;
2080
2081	mp = *mpp;
2082
2083	tll = q->q_ptr;
2084	ASSERT(!(tll->tll_flags & TLLF_NOTLOWER));
2085
2086	tcl = NULL;
2087	switch (tll->tll_style) {
2088	case PTS_PPPOE:
2089		/* Note that poep_t alignment is uint16_t */
2090		if ((!IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)) ||
2091		    MBLKL(mp) < sizeof (poep_t)) &&
2092		    !pullupmsg(mp, sizeof (poep_t)))
2093			break;
2094		poep = (const poep_t *)mp->b_rptr;
2095		if (poep->poep_version_type != POE_VERSION)
2096			break;
2097		/*
2098		 * First, extract a session ID number.  All protocols have
2099		 * this.
2100		 */
2101		isdata = (poep->poep_code == POECODE_DATA);
2102		sessid = ntohs(poep->poep_session_id);
2103		remlen = sizeof (*poep);
2104		msglen = ntohs(poep->poep_length);
2105		i = poep->poep_code;
2106		if (i == POECODE_PADI || i == POECODE_PADR) {
2107			/* These go to the server daemon only. */
2108			tcl = tll->tll_defcl;
2109		} else if (i == POECODE_PADO || i == POECODE_PADS) {
2110			/*
2111			 * These go to a client only, and are demuxed
2112			 * by the Host-Uniq field (into which we stuff
2113			 * our local ID number when generating
2114			 * PADI/PADR).
2115			 */
2116			ucp = (const uchar_t *)(poep + 1);
2117			i = msglen;
2118			while (i > POET_HDRLEN) {
2119				if (POET_GET_TYPE(ucp) == POETT_END) {
2120					i = 0;
2121					break;
2122				}
2123				if (POET_GET_TYPE(ucp) == POETT_UNIQ &&
2124				    POET_GET_LENG(ucp) >= sizeof (uint32_t))
2125					break;
2126				i -= POET_GET_LENG(ucp) + POET_HDRLEN;
2127				ucp = POET_NEXT(ucp);
2128			}
2129			if (i >= POET_HDRLEN + 4)
2130				sessid = GETLONG(ucp + POET_HDRLEN);
2131			tcl = tcl_by_minor((minor_t)sessid);
2132		} else {
2133			/*
2134			 * Try minor number as session ID first, since
2135			 * it's used that way on server side.  It's
2136			 * not used that way on the client, though, so
2137			 * this might not work.  If this isn't the
2138			 * right one, then try the tll cache.  If
2139			 * neither is right, then search all open
2140			 * clients.  Did I mention that the PPPoE
2141			 * protocol is badly designed?
2142			 */
2143			tcl = tcl_by_minor((minor_t)sessid);
2144			if (tcl == NULL ||
2145			    (!isdata && tcl->tcl_ctrl_tll != tll) ||
2146			    (isdata && tcl->tcl_data_tll != tll) ||
2147			    sessid != tcl->tcl_rsessid ||
2148			    bcmp(srcaddr, tcl->tcl_address.pta_pppoe.ptma_mac,
2149			    sizeof (tcl->tcl_address.pta_pppoe.ptma_mac)) != 0)
2150				tcl = tll->tll_lastcl;
2151			if (tcl == NULL ||
2152			    (!isdata && tcl->tcl_ctrl_tll != tll) ||
2153			    (isdata && tcl->tcl_data_tll != tll) ||
2154			    sessid != tcl->tcl_rsessid ||
2155			    bcmp(srcaddr, tcl->tcl_address.pta_pppoe.ptma_mac,
2156			    sizeof (tcl->tcl_address.pta_pppoe.ptma_mac)) != 0)
2157				tcl = NULL;
2158			if (tcl == NULL && sessid != 0) {
2159				struct poedat poedat;
2160
2161				/*
2162				 * Slow mode.  Too bad.  If you don't like it,
2163				 * you can always choose a better protocol.
2164				 */
2165				poedat.sessid = sessid;
2166				poedat.tll = tll;
2167				poedat.srcaddr = srcaddr;
2168				poedat.tcl = NULL;
2169				poedat.isdata = isdata;
2170				vmem_walk(tcl_minor_arena, VMEM_ALLOC,
2171				    tclvm_pppoe_search, &poedat);
2172				KLINCR(lks_walks);
2173				if ((tcl = poedat.tcl) != NULL) {
2174					tll->tll_lastcl = tcl;
2175					KCINCR(cks_walks);
2176				}
2177			}
2178		}
2179		break;
2180	}
2181
2182	if (tcl == NULL || tcl->tcl_rq == NULL) {
2183		DTRACE_PROBE3(sppptun__recv__discard, int, sessid,
2184		    tuncl_t *, tcl, mblk_t *, mp);
2185		if (tcl == NULL) {
2186			KLINCR(lks_in_nomatch);
2187		}
2188		if (isdata) {
2189			KLINCR(lks_indata_drops);
2190			if (tcl != NULL)
2191				tcl->tcl_stats.ppp_ierrors++;
2192		} else {
2193			KLINCR(lks_inctrl_drops);
2194			if (tcl != NULL) {
2195				KCINCR(cks_inctrl_drops);
2196			}
2197		}
2198		freemsg(mp);
2199		return (NULL);
2200	}
2201
2202	if (tcl->tcl_data_tll == tll && isdata) {
2203		if (!adjmsg(mp, remlen) ||
2204		    (i = msgsize(mp)) < msglen ||
2205		    (i > msglen && !adjmsg(mp, msglen - i))) {
2206			KLINCR(lks_indata_drops);
2207			tcl->tcl_stats.ppp_ierrors++;
2208			freemsg(mp);
2209			return (NULL);
2210		}
2211		/* XXX -- address/control handling in pppd needs help. */
2212		if (*mp->b_rptr != 0xFF) {
2213			if ((mp = prependb(mp, 2, 1)) == NULL) {
2214				KLINCR(lks_indata_drops);
2215				tcl->tcl_stats.ppp_ierrors++;
2216				return (NULL);
2217			}
2218			mp->b_rptr[0] = 0xFF;
2219			mp->b_rptr[1] = 0x03;
2220		}
2221		MTYPE(mp) = M_DATA;
2222		tcl->tcl_stats.ppp_ibytes += msgsize(mp);
2223		tcl->tcl_stats.ppp_ipackets++;
2224		KLINCR(lks_indata);
2225	} else {
2226		if (isdata || tcl->tcl_ctrl_tll != tll ||
2227		    (mnew = make_control(tcl, tll, PTCA_CONTROL, tcl)) ==
2228		    NULL) {
2229			KLINCR(lks_inctrl_drops);
2230			KCINCR(cks_inctrl_drops);
2231			freemsg(mp);
2232			return (NULL);
2233		}
2234		/* Fix up source address; peer might not be set yet. */
2235		pap = &((struct ppptun_control *)mnew->b_rptr)->ptc_address;
2236		bcopy(srcaddr, pap->pta_pppoe.ptma_mac,
2237		    sizeof (pap->pta_pppoe.ptma_mac));
2238		mnew->b_cont = mp;
2239		mp = mnew;
2240		KLINCR(lks_inctrls);
2241		KCINCR(cks_inctrls);
2242	}
2243	*mpp = mp;
2244	return (tcl->tcl_rq);
2245}
2246
2247/*
2248 * sppptun_urput()
2249 *
2250 * MT-Perimeters:
2251 *    shared inner, shared outer.
2252 *
2253 * Description:
2254 *    Upper read-side put procedure.  Messages from the underlying
2255 *    lower stream driver arrive here.  See sppptun_recv for the
2256 *    demultiplexing logic.
2257 */
2258static void
2259sppptun_urput(queue_t *q, mblk_t *mp)
2260{
2261	union DL_primitives *dlprim;
2262	mblk_t *mpnext;
2263	tunll_t *tll;
2264	queue_t *nextq;
2265
2266	tll = q->q_ptr;
2267	ASSERT(!(tll->tll_flags & TLLF_NOTLOWER));
2268
2269	switch (MTYPE(mp)) {
2270	case M_DATA:
2271		/*
2272		 * When we're bound over IP, data arrives here.  The
2273		 * packet starts with the IP header itself.
2274		 */
2275		if ((nextq = sppptun_recv(q, &mp, NULL)) != NULL)
2276			putnext(nextq, mp);
2277		break;
2278
2279	case M_PROTO:
2280	case M_PCPROTO:
2281		/* Data arrives here for UDP or raw Ethernet, not IP. */
2282		switch (tll->tll_style) {
2283			/* PPTP control messages are over TCP only. */
2284		case PTS_PPTP:
2285		default:
2286			ASSERT(0);	/* how'd that happen? */
2287			break;
2288
2289		case PTS_PPPOE:		/* DLPI message */
2290			if (MBLKL(mp) < sizeof (t_uscalar_t))
2291				break;
2292			dlprim = (union DL_primitives *)mp->b_rptr;
2293			switch (dlprim->dl_primitive) {
2294			case DL_UNITDATA_IND: {
2295				size_t mlen = MBLKL(mp);
2296
2297				if (mlen < DL_UNITDATA_IND_SIZE)
2298					break;
2299				if (dlprim->unitdata_ind.dl_src_addr_offset <
2300				    DL_UNITDATA_IND_SIZE ||
2301				    dlprim->unitdata_ind.dl_src_addr_offset +
2302				    dlprim->unitdata_ind.dl_src_addr_length >
2303				    mlen)
2304					break;
2305			}
2306				/* FALLTHROUGH */
2307			case DL_UNITDATA_REQ:	/* For loopback support. */
2308				if (dlprim->dl_primitive == DL_UNITDATA_REQ &&
2309				    MBLKL(mp) < DL_UNITDATA_REQ_SIZE)
2310					break;
2311				if ((mpnext = mp->b_cont) == NULL)
2312					break;
2313				MTYPE(mpnext) = M_DATA;
2314				nextq = sppptun_recv(q, &mpnext,
2315				    dlprim->dl_primitive == DL_UNITDATA_IND ?
2316				    mp->b_rptr +
2317				    dlprim->unitdata_ind.dl_src_addr_offset :
2318				    tll->tll_lcladdr.pta_pppoe.ptma_mac);
2319				if (nextq != NULL)
2320					putnext(nextq, mpnext);
2321				freeb(mp);
2322				return;
2323
2324			default:
2325				urput_dlpi(q, mp);
2326				return;
2327			}
2328			break;
2329		}
2330		freemsg(mp);
2331		break;
2332
2333	default:
2334		freemsg(mp);
2335		break;
2336	}
2337}
2338
2339/*
2340 * sppptun_ursrv()
2341 *
2342 * MT-Perimeters:
2343 *    exclusive inner, shared outer.
2344 *
2345 * Description:
2346 *    Upper read-side service procedure.  This procedure services the
2347 *    client streams.  We get here because the client (PPP) asserts
2348 *    flow control down to us.
2349 */
2350static int
2351sppptun_ursrv(queue_t *q)
2352{
2353	mblk_t		*mp;
2354
2355	ASSERT(q->q_ptr != NULL);
2356
2357	while ((mp = getq(q)) != NULL) {
2358		if (canputnext(q)) {
2359			putnext(q, mp);
2360		} else {
2361			(void) putbq(q, mp);
2362			break;
2363		}
2364	}
2365	return (0);
2366}
2367
2368/*
2369 * Dummy constructor/destructor functions for kmem_cache_create.
2370 * We're just using kmem as an allocator of integers, not real
2371 * storage.
2372 */
2373
2374/*ARGSUSED*/
2375static int
2376tcl_constructor(void *maddr, void *arg, int kmflags)
2377{
2378	return (0);
2379}
2380
2381/*ARGSUSED*/
2382static void
2383tcl_destructor(void *maddr, void *arg)
2384{
2385}
2386
2387/*
2388 * Total size occupied by one tunnel client.  Each tunnel client
2389 * consumes one pointer for tcl_slots array, one tuncl_t structure and
2390 * two messages preallocated for close.
2391 */
2392#define	TUNCL_SIZE (sizeof (tuncl_t) + sizeof (tuncl_t *) + \
2393			2 * sizeof (dblk_t))
2394
2395/*
2396 * Clear all bits of x except the highest bit
2397 */
2398#define	truncate(x) 	((x) <= 2 ? (x) : (1 << (highbit(x) - 1)))
2399
2400/*
2401 * This function initializes some well-known global variables inside
2402 * the module.
2403 *
2404 * Called by sppptun_mod.c:_init() before installing the module.
2405 */
2406void
2407sppptun_init(void)
2408{
2409	tunll_list.q_forw = tunll_list.q_back = &tunll_list;
2410}
2411
2412/*
2413 * This function allocates the initial internal storage for the
2414 * sppptun driver.
2415 *
2416 * Called by sppptun_mod.c:_init() after installing module.
2417 */
2418void
2419sppptun_tcl_init(void)
2420{
2421	uint_t i, j;
2422
2423	rw_init(&tcl_rwlock, NULL, RW_DRIVER, NULL);
2424	rw_enter(&tcl_rwlock, RW_WRITER);
2425	tcl_nslots = sppptun_init_cnt;
2426	tcl_slots = kmem_zalloc(tcl_nslots * sizeof (tuncl_t *), KM_SLEEP);
2427
2428	tcl_cache = kmem_cache_create("sppptun_map", sizeof (tuncl_t), 0,
2429	    tcl_constructor, tcl_destructor, NULL, NULL, NULL, 0);
2430
2431	/* Allocate integer space for minor numbers */
2432	tcl_minor_arena = vmem_create("sppptun_minor", (void *)1, tcl_nslots,
2433	    1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
2434
2435	/*
2436	 * Calculate available number of tunnels - how many tunnels
2437	 * can we allocate in sppptun_pctofmem % of available
2438	 * memory.  The value is rounded up to the nearest power of 2.
2439	 */
2440	i = (sppptun_pctofmem * kmem_maxavail()) / (100 * TUNCL_SIZE);
2441	j = truncate(i);	/* i with non-high bits stripped */
2442	if (i != j)
2443		j *= 2;
2444	tcl_minormax = j;
2445	rw_exit(&tcl_rwlock);
2446}
2447
2448/*
2449 * This function checks that there are no plumbed streams or other users.
2450 *
2451 * Called by sppptun_mod.c:_fini().  Assumes that we're exclusive on
2452 * both perimeters.
2453 */
2454int
2455sppptun_tcl_fintest(void)
2456{
2457	if (tunll_list.q_forw != &tunll_list || tcl_inuse > 0)
2458		return (EBUSY);
2459	else
2460		return (0);
2461}
2462
2463/*
2464 * If no lower streams are plumbed, then this function deallocates all
2465 * internal storage in preparation for unload.
2466 *
2467 * Called by sppptun_mod.c:_fini().  Assumes that we're exclusive on
2468 * both perimeters.
2469 */
2470void
2471sppptun_tcl_fini(void)
2472{
2473	if (tcl_minor_arena != NULL) {
2474		vmem_destroy(tcl_minor_arena);
2475		tcl_minor_arena = NULL;
2476	}
2477	if (tcl_cache != NULL) {
2478		kmem_cache_destroy(tcl_cache);
2479		tcl_cache = NULL;
2480	}
2481	kmem_free(tcl_slots, tcl_nslots * sizeof (tuncl_t *));
2482	tcl_slots = NULL;
2483	rw_destroy(&tcl_rwlock);
2484	ASSERT(tcl_slots == NULL);
2485	ASSERT(tcl_cache == NULL);
2486	ASSERT(tcl_minor_arena == NULL);
2487}
2488