optcom.c revision 5240:e7599510dd03
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25/* Copyright (c) 1990 Mentat Inc. */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29/*
30 * This file contains common code for handling Options Management requests.
31 */
32
33#include <sys/types.h>
34#include <sys/stream.h>
35#include <sys/stropts.h>
36#include <sys/strsubr.h>
37#include <sys/errno.h>
38#define	_SUN_TPI_VERSION 2
39#include <sys/tihdr.h>
40#include <sys/socket.h>
41#include <sys/ddi.h>
42#include <sys/debug.h>		/* for ASSERT */
43#include <sys/policy.h>
44
45#include <inet/common.h>
46#include <inet/mi.h>
47#include <inet/nd.h>
48#include <netinet/ip6.h>
49#include <inet/ip.h>
50#include <inet/mib2.h>
51#include <netinet/in.h>
52#include "optcom.h"
53
54#include <inet/optcom.h>
55
56/*
57 * Function prototypes
58 */
59static t_scalar_t process_topthdrs_first_pass(mblk_t *, cred_t *, optdb_obj_t *,
60    boolean_t *, size_t *);
61static t_scalar_t do_options_second_pass(queue_t *q, mblk_t *reqmp,
62    mblk_t *ack_mp, cred_t *, optdb_obj_t *dbobjp,
63    mblk_t *first_mp, boolean_t is_restart, boolean_t *queued_statusp);
64static t_uscalar_t get_worst_status(t_uscalar_t, t_uscalar_t);
65static int do_opt_default(queue_t *, struct T_opthdr *, uchar_t **,
66    t_uscalar_t *, cred_t *, optdb_obj_t *);
67static void do_opt_current(queue_t *, struct T_opthdr *, uchar_t **,
68    t_uscalar_t *, cred_t *cr, optdb_obj_t *);
69static int do_opt_check_or_negotiate(queue_t *q, struct T_opthdr *reqopt,
70    uint_t optset_context, uchar_t **resptrp, t_uscalar_t *worst_statusp,
71    cred_t *, optdb_obj_t *dbobjp, mblk_t *first_mp);
72static opdes_t *opt_chk_lookup(t_uscalar_t, t_uscalar_t, opdes_t *, uint_t);
73static boolean_t opt_level_valid(t_uscalar_t, optlevel_t *, uint_t);
74static size_t opt_level_allopts_lengths(t_uscalar_t, opdes_t *, uint_t);
75static boolean_t opt_length_ok(opdes_t *, struct T_opthdr *);
76static t_uscalar_t optcom_max_optbuf_len(opdes_t *, uint_t);
77static boolean_t opt_bloated_maxsize(opdes_t *);
78
79/* Common code for sending back a T_ERROR_ACK. */
80void
81optcom_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
82{
83	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
84		qreply(q, mp);
85}
86
87/*
88 * The option management routines svr4_optcom_req() and tpi_optcom_req() use
89 * callback functions as arguments. Here is the expected interfaces
90 * assumed from the callback functions
91 *
92 *
93 * (1) deffn(q, optlevel, optname, optvalp)
94 *
95 *	- Function only called when default value comes from protocol
96 *	 specific code and not the option database table (indicated by
97 *	  OP_DEF_FN property in option database.)
98 *	- Error return is -1. Valid returns are >=0.
99 *	- When valid, the return value represents the length used for storing
100 *		the default value of the option.
101 *      - Error return implies the called routine did not recognize this
102 *              option. Something downstream could so input is left unchanged
103 *              in request buffer.
104 *
105 * (2) getfn(q, optlevel, optname, optvalp)
106 *
107 *	- Error return is -1. Valid returns are >=0.
108 *	- When valid, the return value represents the length used for storing
109 *		the actual value of the option.
110 *      - Error return implies the called routine did not recognize this
111 *              option. Something downstream could so input is left unchanged
112 *              in request buffer.
113 *
114 * (3) setfn(q, optset_context, optlevel, optname, inlen, invalp,
115 *	outlenp, outvalp, attrp, cr);
116 *
117 *	- OK return is 0, Error code is returned as a non-zero argument.
118 *      - If negative it is ignored by svr4_optcom_req(). If positive, error
119 *        is returned. A negative return implies that option, while handled on
120 *	  this stack is not handled at this level and will be handled further
121 *	  downstream.
122 *	- Both negative and positive errors are treats as errors in an
123 *	  identical manner by tpi_optcom_req(). The errors affect "status"
124 *	  field of each option's T_opthdr. If sucessfull, an appropriate sucess
125 *	  result is carried. If error, it instantiated to "failure" at the
126 *	  topmost level and left unchanged at other levels. (This "failure" can
127 *	  turn to a success at another level).
128 *	- optset_context passed for tpi_optcom_req(). It is interpreted as:
129 *        - SETFN_OPTCOM_CHECKONLY
130 *		semantics are to pretend to set the value and report
131 *		back if it would be successful.
132 *		This is used with T_CHECK semantics in XTI
133 *        - SETFN_OPTCOM_NEGOTIATE
134 *		set the value. Call from option management primitive
135 *		T_OPTMGMT_REQ when T_NEGOTIATE flags is used.
136 *	  - SETFN_UD_NEGOTIATE
137 *		option request came riding on UNITDATA primitive most often
138 *		has  "this datagram" semantics to influence prpoerties
139 *		affecting an outgoig datagram or associated with recived
140 *		datagram
141 *		[ Note: XTI permits this use outside of "this datagram"
142 *		semantics also and permits setting "management related"
143 *		options in this	context and its test suite enforces it ]
144 *	  - SETFN_CONN_NEGOTATE
145 *		option request came riding on CONN_REQ/RES primitive and
146 *		most often has "this connection" (negotiation during
147 *		"connection estblishment") semantics.
148 *		[ Note: XTI permits use of these outside of "this connection"
149 *		semantics and permits "management related" options in this
150 *		context and its test suite enforces it. ]
151 *
152 *	- inlen, invalp is the option length,value requested to be set.
153 *	- outlenp, outvalp represent return parameters which contain the
154 *	  value set and it might be different from one passed on input.
155 *	- attrp points to a data structure that's used by v6 modules to
156 *	  store ancillary data options or sticky options.
157 *	- cr points to the caller's credentials
158 *	- the caller might pass same buffers for input and output and the
159 *	  routine should protect against this case by not updating output
160 *	  buffers until it is done referencing input buffers and any other
161 *	  issues (e.g. not use bcopy() if we do not trust what it does).
162 *      - If option is not known, it returns error. We randomly pick EINVAL.
163 *        It can however get called with options that are handled downstream
164 *        opr upstream so for svr4_optcom_req(), it does not return error for
165 *        negative return values.
166 *
167 */
168
169/*
170 * Upper Level Protocols call this routine when they receive
171 * a T_SVR4_OPTMGMT_REQ message.  They supply callback functions
172 * for setting a new value for a single options, getting the
173 * current value for a single option, and checking for support
174 * of a single option.  svr4_optcom_req validates the option management
175 * buffer passed in, and calls the appropriate routines to do the
176 * job requested.
177 * XXX Code below needs some restructuring after we have some more
178 * macros to support 'struct opthdr' in the headers.
179 *
180 * IP-MT notes: The option management framework functions svr4_optcom_req() and
181 * tpi_optcom_req() allocate and prepend an M_CTL mblk to the actual
182 * T_optmgmt_req mblk and pass the chain as an additional parameter to the
183 * protocol set functions. If a protocol set function (such as ip_opt_set)
184 * cannot process the option immediately it can return EINPROGRESS. ip_opt_set
185 * enqueues the message in the appropriate sq and returns EINPROGRESS. Later
186 * the sq framework arranges to restart this operation and passes control to
187 * the restart function ip_restart_optmgmt() which in turn calls
188 * svr4_optcom_req() or tpi_optcom_req() to restart the option processing.
189 */
190int
191svr4_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp,
192    boolean_t pass_to_ip)
193{
194	pfi_t	deffn = dbobjp->odb_deffn;
195	pfi_t	getfn = dbobjp->odb_getfn;
196	opt_set_fn setfn = dbobjp->odb_setfn;
197	opdes_t	*opt_arr = dbobjp->odb_opt_des_arr;
198	uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
199	boolean_t topmost_tpiprovider = dbobjp->odb_topmost_tpiprovider;
200	opt_restart_t *or;
201	struct opthdr *restart_opt;
202	boolean_t is_restart = B_FALSE;
203	mblk_t	*first_mp;
204
205	t_uscalar_t max_optbuf_len;
206	int len;
207	mblk_t	*mp1 = NULL;
208	struct opthdr *next_opt;
209	struct opthdr *opt;
210	struct opthdr *opt1;
211	struct opthdr *opt_end;
212	struct opthdr *opt_start;
213	opdes_t	*optd;
214	boolean_t	pass_to_next = B_FALSE;
215	struct T_optmgmt_ack *toa;
216	struct T_optmgmt_req *tor;
217
218	/*
219	 * Allocate M_CTL and prepend to the packet for restarting this
220	 * option if needed. IP may need to queue and restart the option
221	 * if it cannot obtain exclusive conditions immediately. Please see
222	 * IP-MT notes before the start of svr4_optcom_req
223	 */
224	if (mp->b_datap->db_type == M_CTL) {
225		is_restart = B_TRUE;
226		first_mp = mp;
227		mp = mp->b_cont;
228		ASSERT(mp->b_wptr - mp->b_rptr >=
229		    sizeof (struct T_optmgmt_req));
230		tor = (struct T_optmgmt_req *)mp->b_rptr;
231		ASSERT(tor->MGMT_flags == T_NEGOTIATE);
232
233		or = (opt_restart_t *)first_mp->b_rptr;
234		opt_start = or->or_start;
235		opt_end = or->or_end;
236		restart_opt = or->or_ropt;
237		goto restart;
238	}
239
240	tor = (struct T_optmgmt_req *)mp->b_rptr;
241	/* Verify message integrity. */
242	if (mp->b_wptr - mp->b_rptr < sizeof (struct T_optmgmt_req))
243		goto bad_opt;
244	/* Verify MGMT_flags legal */
245	switch (tor->MGMT_flags) {
246	case T_DEFAULT:
247	case T_NEGOTIATE:
248	case T_CURRENT:
249	case T_CHECK:
250		/* OK - legal request flags */
251		break;
252	default:
253		optcom_err_ack(q, mp, TBADFLAG, 0);
254		return (0);
255	}
256	if (tor->MGMT_flags == T_DEFAULT) {
257		/* Is it a request for default option settings? */
258
259		/*
260		 * Note: XXX TLI and TPI specification was unclear about
261		 * semantics of T_DEFAULT and the following historical note
262		 * and its interpretation is incorrect (it implies a request
263		 * for default values of only the identified options not all.
264		 * The semantics have been explained better in XTI spec.)
265		 * However, we do not modify (comment or code) here to keep
266		 * compatibility.
267		 * We can rethink this if it ever becomes an issue.
268		 * ----historical comment start------
269		 * As we understand it, the input buffer is meaningless
270		 * so we ditch the message.  A T_DEFAULT request is a
271		 * request to obtain a buffer containing defaults for
272		 * all supported options, so we allocate a maximum length
273		 * reply.
274		 * ----historical comment end -------
275		 */
276		/* T_DEFAULT not passed down */
277		ASSERT(topmost_tpiprovider == B_TRUE);
278		freemsg(mp);
279		max_optbuf_len = optcom_max_optbuf_len(opt_arr,
280		    opt_arr_cnt);
281		mp = allocb(max_optbuf_len, BPRI_MED);
282		if (!mp) {
283no_mem:;
284			optcom_err_ack(q, mp, TSYSERR, ENOMEM);
285			return (0);
286		}
287
288		/* Initialize the T_optmgmt_ack header. */
289		toa = (struct T_optmgmt_ack *)mp->b_rptr;
290		bzero((char *)toa, max_optbuf_len);
291		toa->PRIM_type = T_OPTMGMT_ACK;
292		toa->OPT_offset = (t_scalar_t)sizeof (struct T_optmgmt_ack);
293		/* TODO: Is T_DEFAULT the right thing to put in MGMT_flags? */
294		toa->MGMT_flags = T_DEFAULT;
295
296		/* Now walk the table of options passed in */
297		opt = (struct opthdr *)&toa[1];
298		for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {
299			/*
300			 * All the options in the table of options passed
301			 * in are by definition supported by the protocol
302			 * calling this function.
303			 */
304			if (!OA_READ_PERMISSION(optd, cr))
305				continue;
306			opt->level = optd->opdes_level;
307			opt->name = optd->opdes_name;
308			if (!(optd->opdes_props & OP_DEF_FN) ||
309			    ((len = (*deffn)(q, opt->level,
310			    opt->name, (uchar_t *)&opt[1])) < 0)) {
311				/*
312				 * Fill length and value from table.
313				 *
314				 * Default value not instantiated from function
315				 * (or the protocol specific function failed it;
316				 * In this interpretation of T_DEFAULT, this is
317				 * the best we can do)
318				 */
319				switch (optd->opdes_size) {
320				/*
321				 * Since options are guaranteed aligned only
322				 * on a 4 byte boundary (t_scalar_t) any
323				 * option that is greater in size will default
324				 * to the bcopy below
325				 */
326				case sizeof (int32_t):
327					*(int32_t *)&opt[1] =
328					    (int32_t)optd->opdes_default;
329					break;
330				case sizeof (int16_t):
331					*(int16_t *)&opt[1] =
332					    (int16_t)optd->opdes_default;
333					break;
334				case sizeof (int8_t):
335					*(int8_t *)&opt[1] =
336					    (int8_t)optd->opdes_default;
337					break;
338				default:
339					/*
340					 * other length but still assume
341					 * fixed - use bcopy
342					 */
343					bcopy(optd->opdes_defbuf,
344					    &opt[1], optd->opdes_size);
345					break;
346				}
347				opt->len = optd->opdes_size;
348			}
349			else
350				opt->len = (t_uscalar_t)len;
351			opt = (struct opthdr *)((char *)&opt[1] +
352			    _TPI_ALIGN_OPT(opt->len));
353		}
354
355		/* Now record the final length. */
356		toa->OPT_length = (t_scalar_t)((char *)opt - (char *)&toa[1]);
357		mp->b_wptr = (uchar_t *)opt;
358		mp->b_datap->db_type = M_PCPROTO;
359		/* Ship it back. */
360		qreply(q, mp);
361		return (0);
362	}
363	/* T_DEFAULT processing complete - no more T_DEFAULT */
364
365	/*
366	 * For T_NEGOTIATE, T_CURRENT, and T_CHECK requests, we make a
367	 * pass through the input buffer validating the details and
368	 * making sure each option is supported by the protocol.
369	 */
370	if ((opt_start = (struct opthdr *)mi_offset_param(mp,
371	    tor->OPT_offset, tor->OPT_length)) == NULL)
372		goto bad_opt;
373	if (!__TPI_OPT_ISALIGNED(opt_start))
374		goto bad_opt;
375
376	opt_end = (struct opthdr *)((uchar_t *)opt_start +
377	    tor->OPT_length);
378
379	for (opt = opt_start; opt < opt_end; opt = next_opt) {
380		/*
381		 * Verify we have room to reference the option header
382		 * fields in the option buffer.
383		 */
384		if ((uchar_t *)opt + sizeof (struct opthdr) >
385		    (uchar_t *)opt_end)
386			goto bad_opt;
387		/*
388		 * We now compute pointer to next option in buffer 'next_opt'
389		 * The next_opt computation above below 'opt->len' initialized
390		 * by application which cannot be trusted. The usual value
391		 * too large will be captured by the loop termination condition
392		 * above. We check for the following which it will miss.
393		 * 	-pointer space wraparound arithmetic overflow
394		 *	-last option in buffer with 'opt->len' being too large
395		 *	 (only reason 'next_opt' should equal or exceed
396		 *	 'opt_end' for last option is roundup unless length is
397		 *	 too-large/invalid)
398		 */
399		next_opt = (struct opthdr *)((uchar_t *)&opt[1] +
400		    _TPI_ALIGN_OPT(opt->len));
401
402		if ((uchar_t *)next_opt < (uchar_t *)&opt[1] ||
403		    ((next_opt >= opt_end) &&
404		    (((uchar_t *)next_opt - (uchar_t *)opt_end) >=
405		    __TPI_ALIGN_SIZE)))
406			goto bad_opt;
407
408		/* sanity check */
409		if (opt->name == T_ALLOPT)
410			goto bad_opt;
411
412		/* Find the option in the opt_arr. */
413		if ((optd = opt_chk_lookup(opt->level, opt->name,
414		    opt_arr, opt_arr_cnt)) == NULL) {
415			/*
416			 * Not found, that is a bad thing if
417			 * the caller is a tpi provider
418			 */
419			if (topmost_tpiprovider)
420				goto bad_opt;
421			else
422				continue; /* skip unmodified */
423		}
424
425		/* Additional checks dependent on operation. */
426		switch (tor->MGMT_flags) {
427		case T_NEGOTIATE:
428			if (!OA_WRITE_OR_EXECUTE(optd, cr)) {
429				/* can't negotiate option */
430				if (!(OA_MATCHED_PRIV(optd, cr)) &&
431				    OA_WX_ANYPRIV(optd)) {
432					/*
433					 * not privileged but privilege
434					 * will help negotiate option.
435					 */
436					optcom_err_ack(q, mp, TACCES, 0);
437					return (0);
438				} else
439					goto bad_opt;
440			}
441			/*
442			 * Verify size for options
443			 * Note: For retaining compatibility with historical
444			 * behavior, variable lengths options will have their
445			 * length verified in the setfn() processing.
446			 * In order to be compatible with SunOS 4.X we return
447			 * EINVAL errors for bad lengths.
448			 */
449			if (!(optd->opdes_props & OP_VARLEN)) {
450				/* fixed length - size must match */
451				if (opt->len != optd->opdes_size) {
452					optcom_err_ack(q, mp, TSYSERR, EINVAL);
453					return (0);
454				}
455			}
456			break;
457
458		case T_CHECK:
459			if (!OA_RWX_ANYPRIV(optd))
460				/* any of "rwx" permission but not not none */
461				goto bad_opt;
462			/*
463			 * XXX Since T_CURRENT was not there in TLI and the
464			 * official TLI inspired TPI standard, getsockopt()
465			 * API uses T_CHECK (for T_CURRENT semantics)
466			 * The following fallthru makes sense because of its
467			 * historical use as semantic equivalent to T_CURRENT.
468			 */
469			/* FALLTHRU */
470		case T_CURRENT:
471			if (!OA_READ_PERMISSION(optd, cr)) {
472				/* can't read option value */
473				if (!(OA_MATCHED_PRIV(optd, cr)) &&
474				    OA_R_ANYPRIV(optd)) {
475					/*
476					 * not privileged but privilege
477					 * will help in reading option value.
478					 */
479					optcom_err_ack(q, mp, TACCES, 0);
480					return (0);
481				} else
482					goto bad_opt;
483			}
484			break;
485
486		default:
487			optcom_err_ack(q, mp, TBADFLAG, 0);
488			return (0);
489		}
490		/* We liked it.  Keep going. */
491	} /* end for loop scanning option buffer */
492
493	/* Now complete the operation as required. */
494	switch (tor->MGMT_flags) {
495	case T_CHECK:
496		/*
497		 * Historically used same as T_CURRENT (which was added to
498		 * standard later). Code retained for compatibility.
499		 */
500		/* FALLTHROUGH */
501	case T_CURRENT:
502		/*
503		 * Allocate a maximum size reply.  Perhaps we are supposed to
504		 * assume that the input buffer includes space for the answers
505		 * as well as the opthdrs, but we don't know that for sure.
506		 * So, instead, we create a new output buffer, using the
507		 * input buffer only as a list of options.
508		 */
509		max_optbuf_len = optcom_max_optbuf_len(opt_arr,
510		    opt_arr_cnt);
511		mp1 = allocb_cred(max_optbuf_len, cr);
512		if (!mp1)
513			goto no_mem;
514		/* Initialize the header. */
515		mp1->b_datap->db_type = M_PCPROTO;
516		mp1->b_wptr = &mp1->b_rptr[sizeof (struct T_optmgmt_ack)];
517		toa = (struct T_optmgmt_ack *)mp1->b_rptr;
518		toa->OPT_offset = (t_scalar_t)sizeof (struct T_optmgmt_ack);
519		toa->MGMT_flags = tor->MGMT_flags;
520		/*
521		 * Walk through the input buffer again, this time adding
522		 * entries to the output buffer for each option requested.
523		 * Note, sanity of option header, last option etc, verified
524		 * in first pass.
525		 */
526		opt1 = (struct opthdr *)&toa[1];
527
528		for (opt = opt_start; opt < opt_end; opt = next_opt) {
529
530			next_opt = (struct opthdr *)((uchar_t *)&opt[1] +
531			    _TPI_ALIGN_OPT(opt->len));
532
533			opt1->name = opt->name;
534			opt1->level = opt->level;
535			len = (*getfn)(q, opt->level,
536			    opt->name, (uchar_t *)&opt1[1]);
537			/*
538			 * Failure means option is not recognized. Copy input
539			 * buffer as is
540			 */
541			if (len < 0) {
542				opt1->len = opt->len;
543				bcopy(&opt[1], &opt1[1], opt->len);
544			} else {
545				opt1->len = (t_uscalar_t)len;
546			}
547			opt1 = (struct opthdr *)((uchar_t *)&opt1[1] +
548			    _TPI_ALIGN_OPT(opt1->len));
549		} /* end for loop */
550
551		/* Record the final length. */
552		toa->OPT_length = (t_scalar_t)((uchar_t *)opt1 -
553		    (uchar_t *)&toa[1]);
554		mp1->b_wptr = (uchar_t *)opt1;
555		/* Ditch the input buffer. */
556		freemsg(mp);
557		mp = mp1;
558		/* Always let the next module look at the option. */
559		pass_to_next = B_TRUE;
560		break;
561
562	case T_NEGOTIATE:
563		first_mp = allocb(sizeof (opt_restart_t), BPRI_LO);
564		if (first_mp == NULL) {
565			optcom_err_ack(q, mp, TSYSERR, ENOMEM);
566			return (0);
567		}
568		first_mp->b_datap->db_type = M_CTL;
569		or = (opt_restart_t *)first_mp->b_rptr;
570		or->or_start = opt_start;
571		or->or_end =  opt_end;
572		or->or_type = T_SVR4_OPTMGMT_REQ;
573		or->or_private = 0;
574		first_mp->b_cont = mp;
575restart:
576		/*
577		 * Here we are expecting that the response buffer is exactly
578		 * the same size as the input buffer.  We pass each opthdr
579		 * to the protocol's set function.  If the protocol doesn't
580		 * like it, it can update the value in it return argument.
581		 */
582		/*
583		 * Pass each negotiated option through the protocol set
584		 * function.
585		 * Note: sanity check on option header values done in first
586		 * pass and not repeated here.
587		 */
588		toa = (struct T_optmgmt_ack *)tor;
589
590		for (opt = is_restart ? restart_opt: opt_start; opt < opt_end;
591		    opt = next_opt) {
592			int error;
593
594			/*
595			 * Point to the current option in or, in case this
596			 * option has to be restarted later on
597			 */
598			or->or_ropt = opt;
599			next_opt = (struct opthdr *)((uchar_t *)&opt[1] +
600			    _TPI_ALIGN_OPT(opt->len));
601
602			error = (*setfn)(q, SETFN_OPTCOM_NEGOTIATE,
603			    opt->level, opt->name,
604			    opt->len, (uchar_t *)&opt[1],
605			    &opt->len, (uchar_t *)&opt[1], NULL, cr, first_mp);
606			/*
607			 * Treat positive "errors" as real.
608			 * Note: negative errors are to be treated as
609			 * non-fatal by svr4_optcom_req() and are
610			 * returned by setfn() when it is passed an
611			 * option it does not handle. Since the option
612			 * passed opt_chk_lookup(), it is implied that
613			 * it is valid but was either handled upstream
614			 * or will be handled downstream.
615			 */
616			if (error == EINPROGRESS) {
617				/*
618				 * The message is queued and will be
619				 * reprocessed later. Typically ip queued
620				 * the message to get some exclusive conditions
621				 * and later on calls this func again.
622				 */
623				return (EINPROGRESS);
624			} else if (error > 0) {
625				optcom_err_ack(q, mp, TSYSERR, error);
626				freeb(first_mp);
627				return (0);
628			}
629			/*
630			 * error < 0 means option is not recognized.
631			 * But with OP_PASSNEXT the next module
632			 * might recognize it.
633			 */
634		}
635		/* Done with the restart control mp. */
636		freeb(first_mp);
637		pass_to_next = B_TRUE;
638		break;
639	default:
640		optcom_err_ack(q, mp, TBADFLAG, 0);
641		return (0);
642	}
643
644	if (pass_to_next && (q->q_next != NULL || pass_to_ip)) {
645		/* Send it down to the next module and let it reply */
646		toa->PRIM_type = T_SVR4_OPTMGMT_REQ; /* Changed by IP to ACK */
647		if (q->q_next != NULL)
648			putnext(q, mp);
649		else
650			ip_output(Q_TO_CONN(q), mp, q, IP_WPUT);
651	} else {
652		/* Set common fields in the header. */
653		toa->MGMT_flags = T_SUCCESS;
654		mp->b_datap->db_type = M_PCPROTO;
655		toa->PRIM_type = T_OPTMGMT_ACK;
656		qreply(q, mp);
657	}
658	return (0);
659bad_opt:;
660	optcom_err_ack(q, mp, TBADOPT, 0);
661	return (0);
662}
663
664/*
665 * New optcom_req inspired by TPI/XTI semantics
666 */
667int
668tpi_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp,
669    boolean_t pass_to_ip)
670{
671	t_scalar_t t_error;
672	mblk_t *toa_mp;
673	boolean_t pass_to_next;
674	size_t toa_len;
675	struct T_optmgmt_ack *toa;
676	struct T_optmgmt_req *tor =
677	    (struct T_optmgmt_req *)mp->b_rptr;
678
679	opt_restart_t *or;
680	boolean_t is_restart = B_FALSE;
681	mblk_t	*first_mp = NULL;
682	t_uscalar_t worst_status;
683	boolean_t queued_status;
684
685	/*
686	 * Allocate M_CTL and prepend to the packet for restarting this
687	 * option if needed. IP may need to queue and restart the option
688	 * if it cannot obtain exclusive conditions immediately. Please see
689	 * IP-MT notes before the start of svr4_optcom_req
690	 */
691	if (mp->b_datap->db_type == M_CTL) {
692		is_restart = B_TRUE;
693		first_mp = mp;
694		toa_mp = mp->b_cont;
695		mp = toa_mp->b_cont;
696		ASSERT(mp->b_wptr - mp->b_rptr >=
697		    sizeof (struct T_optmgmt_req));
698		tor = (struct T_optmgmt_req *)mp->b_rptr;
699		ASSERT(tor->MGMT_flags == T_NEGOTIATE);
700
701		or = (opt_restart_t *)first_mp->b_rptr;
702		goto restart;
703	}
704
705	/* Verify message integrity. */
706	if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_optmgmt_req)) {
707		optcom_err_ack(q, mp, TBADOPT, 0);
708		return (0);
709	}
710
711	/* Verify MGMT_flags legal */
712	switch (tor->MGMT_flags) {
713	case T_DEFAULT:
714	case T_NEGOTIATE:
715	case T_CURRENT:
716	case T_CHECK:
717		/* OK - legal request flags */
718		break;
719	default:
720		optcom_err_ack(q, mp, TBADFLAG, 0);
721		return (0);
722	}
723
724	/*
725	 * In this design, there are two passes required on the input buffer
726	 * mostly to accomodate variable length options and "T_ALLOPT" option
727	 * which has the semantics "all options of the specified level".
728	 *
729	 * For T_DEFAULT, T_NEGOTIATE, T_CURRENT, and T_CHECK requests, we make
730	 * a pass through the input buffer validating the details and making
731	 * sure each option is supported by the protocol. We also determine the
732	 * length of the option buffer to return. (Variable length options and
733	 * T_ALLOPT mean that length can be different for output buffer).
734	 */
735
736	pass_to_next = B_FALSE;	/* initial value */
737	toa_len = 0;		/* initial value */
738
739	/*
740	 * First pass, we do the following
741	 *	- estimate cumulative length needed for results
742	 *	- set "status" field based on permissions, option header check
743	 *	  etc.
744	 *	- determine "pass_to_next" whether we need to send request to
745	 *	  downstream module/driver.
746	 */
747	if ((t_error = process_topthdrs_first_pass(mp, cr, dbobjp,
748	    &pass_to_next, &toa_len)) != 0) {
749		optcom_err_ack(q, mp, t_error, 0);
750		return (0);
751	}
752
753	/*
754	 * A validation phase of the input buffer is done. We have also
755	 * obtained the length requirement and and other details about the
756	 * input and we liked input buffer so far.  We make another scan
757	 * through the input now and generate the output necessary to complete
758	 * the operation.
759	 */
760
761	toa_mp = allocb_cred(toa_len, cr);
762	if (!toa_mp) {
763		optcom_err_ack(q, mp, TSYSERR, ENOMEM);
764		return (0);
765	}
766
767	first_mp = allocb(sizeof (opt_restart_t), BPRI_LO);
768	if (first_mp == NULL) {
769		freeb(toa_mp);
770		optcom_err_ack(q, mp, TSYSERR, ENOMEM);
771		return (0);
772	}
773	first_mp->b_datap->db_type = M_CTL;
774	or = (opt_restart_t *)first_mp->b_rptr;
775	/*
776	 * Set initial values for generating output.
777	 */
778	or->or_worst_status = T_SUCCESS;
779	or->or_type = T_OPTMGMT_REQ;
780	or->or_private = 0;
781	/* remaining fields fileed in do_options_second_pass */
782
783restart:
784	/*
785	 * This routine makes another pass through the option buffer this
786	 * time acting on the request based on "status" result in the
787	 * first pass. It also performs "expansion" of T_ALLOPT into
788	 * all options of a certain level and acts on each for this request.
789	 */
790	if ((t_error = do_options_second_pass(q, mp, toa_mp, cr, dbobjp,
791	    first_mp, is_restart, &queued_status)) != 0) {
792		freemsg(toa_mp);
793		optcom_err_ack(q, mp, t_error, 0);
794		return (0);
795	}
796	if (queued_status) {
797		/* Option will be restarted */
798		return (EINPROGRESS);
799	}
800	worst_status = or->or_worst_status;
801	/* Done with the first mp */
802	freeb(first_mp);
803	toa_mp->b_cont = NULL;
804
805	/*
806	 * Following code relies on the coincidence that T_optmgmt_req
807	 * and T_optmgmt_ack are identical in binary representation
808	 */
809	toa = (struct T_optmgmt_ack *)toa_mp->b_rptr;
810	toa->OPT_length = (t_scalar_t)(toa_mp->b_wptr - (toa_mp->b_rptr +
811	    sizeof (struct T_optmgmt_ack)));
812	toa->OPT_offset = (t_scalar_t)sizeof (struct T_optmgmt_ack);
813
814	toa->MGMT_flags = tor->MGMT_flags;
815
816
817	freemsg(mp);		/* free input mblk */
818
819	/*
820	 * If there is atleast one option that requires a downstream
821	 * forwarding and if it is possible, we forward the message
822	 * downstream. Else we ack it.
823	 */
824	if (pass_to_next && (q->q_next != NULL || pass_to_ip)) {
825		/*
826		 * We pass it down as T_OPTMGMT_REQ. This code relies
827		 * on the happy coincidence that T_optmgmt_req and
828		 * T_optmgmt_ack are identical data structures
829		 * at the binary representation level.
830		 */
831		toa_mp->b_datap->db_type = M_PROTO;
832		toa->PRIM_type = T_OPTMGMT_REQ;
833		if (q->q_next != NULL)
834			putnext(q, toa_mp);
835		else
836			ip_output(Q_TO_CONN(q), toa_mp, q, IP_WPUT);
837	} else {
838		toa->PRIM_type = T_OPTMGMT_ACK;
839		toa_mp->b_datap->db_type = M_PCPROTO;
840		toa->MGMT_flags |= worst_status; /* XXX "worst" or "OR" TPI ? */
841		qreply(q, toa_mp);
842	}
843	return (0);
844}
845
846
847/*
848 * Following routine makes a pass through option buffer in mp and performs the
849 * following tasks.
850 *	- estimate cumulative length needed for results
851 *	- set "status" field based on permissions, option header check
852 *	  etc.
853 *	- determine "pass_to_next" whether we need to send request to
854 *	  downstream module/driver.
855 */
856
857static t_scalar_t
858process_topthdrs_first_pass(mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp,
859    boolean_t *pass_to_nextp, size_t *toa_lenp)
860{
861	opdes_t	*opt_arr = dbobjp->odb_opt_des_arr;
862	uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
863	boolean_t topmost_tpiprovider = dbobjp->odb_topmost_tpiprovider;
864	optlevel_t *valid_level_arr = dbobjp->odb_valid_levels_arr;
865	uint_t valid_level_arr_cnt = dbobjp->odb_valid_levels_arr_cnt;
866	struct T_opthdr *opt;
867	struct T_opthdr *opt_start, *opt_end;
868	opdes_t	*optd;
869	size_t allopt_len;
870	struct T_optmgmt_req *tor =
871	    (struct T_optmgmt_req *)mp->b_rptr;
872
873	*toa_lenp = sizeof (struct T_optmgmt_ack); /* initial value */
874
875	if ((opt_start = (struct T_opthdr *)
876	    mi_offset_param(mp, tor->OPT_offset, tor->OPT_length)) == NULL) {
877		return (TBADOPT);
878	}
879	if (!__TPI_TOPT_ISALIGNED(opt_start))
880		return (TBADOPT);
881
882	opt_end = (struct T_opthdr *)((uchar_t *)opt_start + tor->OPT_length);
883
884	for (opt = opt_start; opt && (opt < opt_end);
885	    opt = _TPI_TOPT_NEXTHDR(opt_start, tor->OPT_length, opt)) {
886		/*
887		 * Validate the option for length and alignment
888		 * before accessing anything in it.
889		 */
890		if (!(_TPI_TOPT_VALID(opt, opt_start, opt_end)))
891			return (TBADOPT);
892
893		/* Find the option in the opt_arr. */
894		if (opt->name != T_ALLOPT) {
895			optd = opt_chk_lookup(opt->level, opt->name,
896			    opt_arr, opt_arr_cnt);
897			if (optd == NULL) {
898				/*
899				 * Option not found
900				 *
901				 * Verify if level is "valid" or not.
902				 * Note: This check is required by XTI
903				 *
904				 * TPI provider always initializes
905				 * the "not supported" (or whatever) status
906				 * for the options. Other levels leave status
907				 * unchanged if they do not understand an
908				 * option.
909				 */
910				if (topmost_tpiprovider) {
911					if (!opt_level_valid(opt->level,
912					    valid_level_arr,
913					    valid_level_arr_cnt))
914						return (TBADOPT);
915					/*
916					 * level is valid - initialize
917					 * option as not supported
918					 */
919					opt->status = T_NOTSUPPORT;
920				}
921
922				*toa_lenp += _TPI_ALIGN_TOPT(opt->len);
923				continue;
924			}
925		} else {
926			/*
927			 * Handle T_ALLOPT case as a special case.
928			 * Note: T_ALLOPT does not mean anything
929			 * for T_CHECK operation.
930			 */
931			allopt_len = 0;
932			if (tor->MGMT_flags == T_CHECK ||
933			    !topmost_tpiprovider ||
934			    ((allopt_len = opt_level_allopts_lengths(opt->level,
935			    opt_arr, opt_arr_cnt)) == 0)) {
936				/*
937				 * This is confusing but correct !
938				 * It is not valid to to use T_ALLOPT with
939				 * T_CHECK flag.
940				 *
941				 * T_ALLOPT is assumed "expanded" at the
942				 * topmost_tpiprovider level so it should not
943				 * be there as an "option name" if this is not
944				 * a topmost_tpiprovider call and we fail it.
945				 *
946				 * opt_level_allopts_lengths() is used to verify
947				 * that "level" associated with the T_ALLOPT is
948				 * supported.
949				 *
950				 */
951				opt->status = T_FAILURE;
952				*toa_lenp += _TPI_ALIGN_TOPT(opt->len);
953				continue;
954			}
955			ASSERT(allopt_len != 0); /* remove ? */
956
957			*toa_lenp += allopt_len;
958			opt->status = T_SUCCESS;
959			/* XXX - always set T_ALLOPT 'pass_to_next' for now */
960			*pass_to_nextp = B_TRUE;
961			continue;
962		}
963		/*
964		 * Check if option wants to flow downstream
965		 */
966		if (optd->opdes_props & OP_PASSNEXT)
967			*pass_to_nextp = B_TRUE;
968
969		/* Additional checks dependent on operation. */
970		switch (tor->MGMT_flags) {
971		case T_DEFAULT:
972		case T_CURRENT:
973
974			/*
975			 * The opt_chk_lookup() routine call above approved of
976			 * this option so we can work on the status for it
977			 * based on the permissions for the operation. (This
978			 * can override any status for it set at higher levels)
979			 * We assume this override is OK since chkfn at this
980			 * level approved of this option.
981			 *
982			 * T_CURRENT semantics:
983			 * The read access is required. Else option
984			 * status is T_NOTSUPPORT.
985			 *
986			 * T_DEFAULT semantics:
987			 * Note: specification is not clear on this but we
988			 * interpret T_DEFAULT semantics such that access to
989			 * read value is required for access even the default
990			 * value. Otherwise the option status is T_NOTSUPPORT.
991			 */
992			if (!OA_READ_PERMISSION(optd, cr)) {
993				opt->status = T_NOTSUPPORT;
994				*toa_lenp += _TPI_ALIGN_TOPT(opt->len);
995				/* skip to next */
996				continue;
997			}
998
999			/*
1000			 * T_DEFAULT/T_CURRENT semantics:
1001			 * We know that read access is set. If no other access
1002			 * is set, then status is T_READONLY.
1003			 */
1004			if (OA_READONLY_PERMISSION(optd, cr))
1005				opt->status = T_READONLY;
1006			else
1007				opt->status = T_SUCCESS;
1008			/*
1009			 * Option passes all checks. Make room for it in the
1010			 * ack. Note: size stored in table does not include
1011			 * space for option header.
1012			 */
1013			*toa_lenp += sizeof (struct T_opthdr) +
1014			    _TPI_ALIGN_TOPT(optd->opdes_size);
1015			break;
1016
1017		case T_CHECK:
1018		case T_NEGOTIATE:
1019
1020			/*
1021			 * T_NEGOTIATE semantics:
1022			 * If for fixed length option value on input is not the
1023			 * same as value supplied, then status is T_FAILURE.
1024			 *
1025			 * T_CHECK semantics:
1026			 * If value is supplied, semantics same as T_NEGOTIATE.
1027			 * It is however ok not to supply a value with T_CHECK.
1028			 */
1029
1030			if (tor->MGMT_flags == T_NEGOTIATE ||
1031			    (opt->len != sizeof (struct T_opthdr))) {
1032				/*
1033				 * Implies "value" is specified in T_CHECK or
1034				 * it is a T_NEGOTIATE request.
1035				 * Verify size.
1036				 * Note: This can override anything about this
1037				 * option request done at a higher level.
1038				 */
1039				if (!opt_length_ok(optd, opt)) {
1040					/* bad size */
1041					*toa_lenp += _TPI_ALIGN_TOPT(opt->len);
1042					opt->status = T_FAILURE;
1043					continue;
1044				}
1045			}
1046			/*
1047			 * The opt_chk_lookup()  routine above() approved of
1048			 * this option so we can work on the status for it based
1049			 * on the permissions for the operation. (This can
1050			 * override anything set at a higher level).
1051			 *
1052			 * T_CHECK/T_NEGOTIATE semantics:
1053			 * Set status to T_READONLY if read is the only access
1054			 * permitted
1055			 */
1056			if (OA_READONLY_PERMISSION(optd, cr)) {
1057				opt->status = T_READONLY;
1058				*toa_lenp += _TPI_ALIGN_TOPT(opt->len);
1059				/* skip to next */
1060				continue;
1061			}
1062
1063			/*
1064			 * T_CHECK/T_NEGOTIATE semantics:
1065			 * If write (or execute) access is not set, then status
1066			 * is T_NOTSUPPORT.
1067			 */
1068			if (!OA_WRITE_OR_EXECUTE(optd, cr)) {
1069				opt->status = T_NOTSUPPORT;
1070				*toa_lenp += _TPI_ALIGN_TOPT(opt->len);
1071				/* skip to next option */
1072				continue;
1073			}
1074			/*
1075			 * Option passes all checks. Make room for it in the
1076			 * ack and set success in status.
1077			 * Note: size stored in table does not include header
1078			 * length.
1079			 */
1080			opt->status = T_SUCCESS;
1081			*toa_lenp += sizeof (struct T_opthdr) +
1082			    _TPI_ALIGN_TOPT(optd->opdes_size);
1083			break;
1084
1085		default:
1086			return (TBADFLAG);
1087		}
1088	} /* for loop scanning input buffer */
1089
1090	return (0);		/* OK return */
1091}
1092
1093/*
1094 * This routine makes another pass through the option buffer this
1095 * time acting on the request based on "status" result in the
1096 * first pass. It also performs "expansion" of T_ALLOPT into
1097 * all options of a certain level and acts on each for this request.
1098 */
1099static t_scalar_t
1100do_options_second_pass(queue_t *q, mblk_t *reqmp, mblk_t *ack_mp, cred_t *cr,
1101    optdb_obj_t *dbobjp, mblk_t *first_mp, boolean_t is_restart,
1102    boolean_t *queued_statusp)
1103{
1104	boolean_t topmost_tpiprovider = dbobjp->odb_topmost_tpiprovider;
1105	int failed_option;
1106	struct T_opthdr *opt;
1107	struct T_opthdr *opt_start, *opt_end, *restart_opt;
1108	uchar_t *optr;
1109	uint_t optset_context;
1110	struct T_optmgmt_req *tor = (struct T_optmgmt_req *)reqmp->b_rptr;
1111	opt_restart_t	*or;
1112	t_uscalar_t	*worst_statusp;
1113	int	err;
1114
1115	*queued_statusp = B_FALSE;
1116	or = (opt_restart_t *)first_mp->b_rptr;
1117	worst_statusp = &or->or_worst_status;
1118
1119	optr = (uchar_t *)ack_mp->b_rptr +
1120	    sizeof (struct T_optmgmt_ack); /* assumed int32_t aligned */
1121
1122	/*
1123	 * Set initial values for scanning input
1124	 */
1125	if (is_restart) {
1126		opt_start = (struct T_opthdr *)or->or_start;
1127		opt_end = (struct T_opthdr *)or->or_end;
1128		restart_opt = (struct T_opthdr *)or->or_ropt;
1129	} else {
1130		opt_start = (struct T_opthdr *)mi_offset_param(reqmp,
1131		    tor->OPT_offset, tor->OPT_length);
1132		if (opt_start == NULL)
1133			return (TBADOPT);
1134		opt_end = (struct T_opthdr *)((uchar_t *)opt_start +
1135		    tor->OPT_length);
1136		or->or_start = (struct opthdr *)opt_start;
1137		or->or_end = (struct opthdr *)opt_end;
1138		/*
1139		 * construct the mp chain, in case the setfn needs to
1140		 * queue this and restart option processing later on.
1141		 */
1142		first_mp->b_cont = ack_mp;
1143		ack_mp->b_cont = reqmp;
1144	}
1145	ASSERT(__TPI_TOPT_ISALIGNED(opt_start)); /* verified in first pass */
1146
1147	for (opt = is_restart ? restart_opt : opt_start;
1148	    opt && (opt < opt_end);
1149	    opt = _TPI_TOPT_NEXTHDR(opt_start, tor->OPT_length, opt)) {
1150		or->or_ropt = (struct opthdr *)opt;
1151		/* verified in first pass */
1152		ASSERT(_TPI_TOPT_VALID(opt, opt_start, opt_end));
1153
1154		/*
1155		 * If the first pass in process_topthdrs_first_pass()
1156		 * has marked the option as a failure case for the MGMT_flags
1157		 * semantics then there is not much to do.
1158		 *
1159		 * Note: For all practical purposes, T_READONLY status is
1160		 * a "success" for T_DEFAULT/T_CURRENT and "failure" for
1161		 * T_CHECK/T_NEGOTIATE
1162		 */
1163		failed_option =
1164		    (opt->status == T_NOTSUPPORT) ||
1165		    (opt->status == T_FAILURE) ||
1166		    ((tor->MGMT_flags & (T_NEGOTIATE|T_CHECK)) &&
1167		    (opt->status == T_READONLY));
1168
1169		if (failed_option) {
1170			/*
1171			 * According to T_DEFAULT/T_CURRENT semantics, the
1172			 * input values, even if present, are to be ignored.
1173			 * Note: Specification is not clear on this, but we
1174			 * interpret that even though we ignore the values, we
1175			 * can return them as is. So we process them similar to
1176			 * T_CHECK/T_NEGOTIATE case which has the semantics to
1177			 * return the values as is. XXX If interpretation is
1178			 * ever determined incorrect fill in appropriate code
1179			 * here to treat T_DEFAULT/T_CURRENT differently.
1180			 *
1181			 * According to T_CHECK/T_NEGOTIATE semantics,
1182			 * in the case of T_NOTSUPPORT/T_FAILURE/T_READONLY,
1183			 * the semantics are to return the "value" part of
1184			 * option untouched. So here we copy the option
1185			 * head including value part if any to output.
1186			 */
1187
1188			bcopy(opt, optr, opt->len);
1189			optr += _TPI_ALIGN_TOPT(opt->len);
1190
1191			*worst_statusp = get_worst_status(opt->status,
1192			    *worst_statusp);
1193
1194			/* skip to process next option in buffer */
1195			continue;
1196
1197		} /* end if "failed option" */
1198		/*
1199		 * The status is T_SUCCESS or T_READONLY
1200		 * We process the value part here
1201		 */
1202		ASSERT(opt->status == T_SUCCESS || opt->status == T_READONLY);
1203		switch (tor->MGMT_flags) {
1204		case T_DEFAULT:
1205			/*
1206			 * We fill default value from table or protocol specific
1207			 * function. If this call fails, we pass input through.
1208			 */
1209			if (do_opt_default(q, opt, &optr, worst_statusp,
1210			    cr, dbobjp) < 0) {
1211				/* fail or pass transparently */
1212				if (topmost_tpiprovider)
1213					opt->status = T_FAILURE;
1214				bcopy(opt, optr, opt->len);
1215				optr += _TPI_ALIGN_TOPT(opt->len);
1216				*worst_statusp = get_worst_status(opt->status,
1217				    *worst_statusp);
1218			}
1219			break;
1220
1221		case T_CURRENT:
1222
1223			do_opt_current(q, opt, &optr, worst_statusp, cr,
1224			    dbobjp);
1225			break;
1226
1227		case T_CHECK:
1228		case T_NEGOTIATE:
1229			if (tor->MGMT_flags == T_CHECK)
1230				optset_context = SETFN_OPTCOM_CHECKONLY;
1231			else	/* T_NEGOTIATE */
1232				optset_context = SETFN_OPTCOM_NEGOTIATE;
1233			err = do_opt_check_or_negotiate(q, opt, optset_context,
1234			    &optr, worst_statusp, cr, dbobjp, first_mp);
1235			if (err == EINPROGRESS) {
1236				*queued_statusp = B_TRUE;
1237				return (0);
1238			}
1239			break;
1240		default:
1241			return (TBADFLAG);
1242		}
1243	} /* end for loop scanning option buffer */
1244
1245	ack_mp->b_wptr = optr;
1246	ASSERT(ack_mp->b_wptr <= ack_mp->b_datap->db_lim);
1247
1248	return (0);		/* OK return */
1249}
1250
1251
1252static t_uscalar_t
1253get_worst_status(t_uscalar_t status, t_uscalar_t current_worst_status)
1254{
1255	/*
1256	 * Return the "worst" among the arguments "status" and
1257	 * "current_worst_status".
1258	 *
1259	 * Note: Tracking "worst_status" can be made a bit simpler
1260	 * if we use the property that status codes are bitwise
1261	 * distinct.
1262	 *
1263	 * The pecking order is
1264	 *
1265	 * T_SUCCESS ..... best
1266	 * T_PARTSUCCESS
1267	 * T_FAILURE
1268	 * T_READONLY
1269	 * T_NOTSUPPORT... worst
1270	 */
1271	if (status == current_worst_status)
1272		return (current_worst_status);
1273	switch (current_worst_status) {
1274	case T_SUCCESS:
1275		if (status == T_PARTSUCCESS)
1276			return (T_PARTSUCCESS);
1277		/* FALLTHROUGH */
1278	case T_PARTSUCCESS:
1279		if (status == T_FAILURE)
1280			return (T_FAILURE);
1281		/* FALLTHROUGH */
1282	case T_FAILURE:
1283		if (status == T_READONLY)
1284			return (T_READONLY);
1285		/* FALLTHROUGH */
1286	case T_READONLY:
1287		if (status == T_NOTSUPPORT)
1288			return (T_NOTSUPPORT);
1289		/* FALLTHROUGH */
1290	case T_NOTSUPPORT:
1291	default:
1292		return (current_worst_status);
1293	}
1294}
1295
1296static int
1297do_opt_default(queue_t *q, struct T_opthdr *reqopt, uchar_t **resptrp,
1298    t_uscalar_t *worst_statusp, cred_t *cr, optdb_obj_t *dbobjp)
1299{
1300	pfi_t	deffn = dbobjp->odb_deffn;
1301	opdes_t	*opt_arr = dbobjp->odb_opt_des_arr;
1302	uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
1303	boolean_t topmost_tpiprovider = dbobjp->odb_topmost_tpiprovider;
1304
1305	struct T_opthdr *topth;
1306	opdes_t *optd;
1307
1308	if (reqopt->name != T_ALLOPT) {
1309		/*
1310		 * lookup the option in the table and fill default value
1311		 */
1312		optd = opt_chk_lookup(reqopt->level, reqopt->name,
1313		    opt_arr, opt_arr_cnt);
1314
1315		if (optd == NULL) {
1316			/*
1317			 * not found - fail this one. Should not happen
1318			 * for topmost_tpiprovider as calling routine
1319			 * should have verified it.
1320			 */
1321			ASSERT(!topmost_tpiprovider);
1322			return (-1);
1323		}
1324
1325		topth = (struct T_opthdr *)(*resptrp);
1326		topth->level = reqopt->level;
1327		topth->name = reqopt->name;
1328		topth->status = reqopt->status;
1329
1330		*worst_statusp = get_worst_status(reqopt->status,
1331		    *worst_statusp);
1332
1333		if (optd->opdes_props & OP_NODEFAULT) {
1334			/* header only, no default "value" part */
1335			topth->len = sizeof (struct T_opthdr);
1336			*resptrp += sizeof (struct T_opthdr);
1337		} else {
1338			int deflen;
1339
1340			if (optd->opdes_props & OP_DEF_FN) {
1341				deflen = (*deffn)(q, reqopt->level,
1342				    reqopt->name, _TPI_TOPT_DATA(topth));
1343				if (deflen >= 0) {
1344					topth->len = (t_uscalar_t)
1345					    (sizeof (struct T_opthdr) + deflen);
1346				} else {
1347					/*
1348					 * return error, this should 'pass
1349					 * through' the option and maybe some
1350					 * other level will fill it in or
1351					 * already did.
1352					 * (No change in 'resptrp' upto here)
1353					 */
1354					return (-1);
1355				}
1356			} else {
1357				/* fill length and value part */
1358				switch (optd->opdes_size) {
1359				/*
1360				 * Since options are guaranteed aligned only
1361				 * on a 4 byte boundary (t_scalar_t) any
1362				 * option that is greater in size will default
1363				 * to the bcopy below
1364				 */
1365				case sizeof (int32_t):
1366					*(int32_t *)_TPI_TOPT_DATA(topth) =
1367					    (int32_t)optd->opdes_default;
1368					break;
1369				case sizeof (int16_t):
1370					*(int16_t *)_TPI_TOPT_DATA(topth) =
1371					    (int16_t)optd->opdes_default;
1372					break;
1373				case sizeof (int8_t):
1374					*(int8_t *)_TPI_TOPT_DATA(topth) =
1375					    (int8_t)optd->opdes_default;
1376					break;
1377				default:
1378					/*
1379					 * other length but still assume
1380					 * fixed - use bcopy
1381					 */
1382					bcopy(optd->opdes_defbuf,
1383					    _TPI_TOPT_DATA(topth),
1384					    optd->opdes_size);
1385					break;
1386				}
1387				topth->len = (t_uscalar_t)(optd->opdes_size +
1388				    sizeof (struct T_opthdr));
1389			}
1390			*resptrp += _TPI_ALIGN_TOPT(topth->len);
1391		}
1392		return (0);	/* OK return */
1393	}
1394
1395	/*
1396	 * T_ALLOPT processing
1397	 *
1398	 * lookup and stuff default values of all the options of the
1399	 * level specified
1400	 * Note: This expansion of T_ALLOPT should happen in
1401	 * a topmost_tpiprovider.
1402	 */
1403	ASSERT(topmost_tpiprovider);
1404	for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {
1405		if (reqopt->level != optd->opdes_level)
1406			continue;
1407		/*
1408		 *
1409		 * T_DEFAULT semantics:
1410		 * XXX: we interpret T_DEFAULT semantics such that access to
1411		 * read value is required for access even the default value.
1412		 * Else option is ignored for T_ALLOPT request.
1413		 */
1414		if (!OA_READ_PERMISSION(optd, cr))
1415			/* skip this one */
1416			continue;
1417
1418		/*
1419		 * Found option of same level as T_ALLOPT request
1420		 * that we can return.
1421		 */
1422
1423		topth = (struct T_opthdr *)(*resptrp);
1424		topth->level = optd->opdes_level;
1425		topth->name = optd->opdes_name;
1426
1427		/*
1428		 * T_DEFAULT semantics:
1429		 * We know that read access is set. If no other access is set,
1430		 * then status is T_READONLY
1431		 */
1432		if (OA_READONLY_PERMISSION(optd, cr)) {
1433			topth->status = T_READONLY;
1434			*worst_statusp = get_worst_status(T_READONLY,
1435			    *worst_statusp);
1436		} else {
1437			topth->status = T_SUCCESS;
1438			/*
1439			 * Note: *worst_statusp has to be T_SUCCESS or
1440			 * worse so no need to adjust
1441			 */
1442		}
1443
1444		if (optd->opdes_props & OP_NODEFAULT) {
1445			/* header only, no value part */
1446			topth->len = sizeof (struct T_opthdr);
1447			*resptrp += sizeof (struct T_opthdr);
1448		} else {
1449			int deflen;
1450
1451			if (optd->opdes_props & OP_DEF_FN) {
1452				deflen = (*deffn)(q, reqopt->level,
1453				    reqopt->name, _TPI_TOPT_DATA(topth));
1454				if (deflen >= 0) {
1455					topth->len = (t_uscalar_t)(deflen +
1456					    sizeof (struct T_opthdr));
1457				} else {
1458					/*
1459					 * deffn failed.
1460					 * return just the header as T_ALLOPT
1461					 * expansion.
1462					 * Some other level deffn may
1463					 * supply value part.
1464					 */
1465					topth->len = sizeof (struct T_opthdr);
1466					topth->status = T_FAILURE;
1467					*worst_statusp =
1468					    get_worst_status(T_FAILURE,
1469					    *worst_statusp);
1470				}
1471			} else {
1472				/*
1473				 * fill length and value part from
1474				 * table
1475				 */
1476				switch (optd->opdes_size) {
1477				/*
1478				 * Since options are guaranteed aligned only
1479				 * on a 4 byte boundary (t_scalar_t) any
1480				 * option that is greater in size will default
1481				 * to the bcopy below
1482				 */
1483				case sizeof (int32_t):
1484					*(int32_t *)_TPI_TOPT_DATA(topth) =
1485					    (int32_t)optd->opdes_default;
1486					break;
1487				case sizeof (int16_t):
1488					*(int16_t *)_TPI_TOPT_DATA(topth) =
1489					    (int16_t)optd->opdes_default;
1490					break;
1491				case sizeof (int8_t):
1492					*(int8_t *)_TPI_TOPT_DATA(topth) =
1493					    (int8_t)optd->opdes_default;
1494					break;
1495				default:
1496					/*
1497					 * other length but still assume
1498					 * fixed - use bcopy
1499					 */
1500					bcopy(optd->opdes_defbuf,
1501					    _TPI_TOPT_DATA(topth),
1502					    optd->opdes_size);
1503				}
1504				topth->len = (t_uscalar_t)(optd->opdes_size +
1505				    sizeof (struct T_opthdr));
1506			}
1507			*resptrp += _TPI_ALIGN_TOPT(topth->len);
1508		}
1509	}
1510	return (0);
1511}
1512
1513static void
1514do_opt_current(queue_t *q, struct T_opthdr *reqopt, uchar_t **resptrp,
1515    t_uscalar_t *worst_statusp, cred_t *cr, optdb_obj_t *dbobjp)
1516{
1517	pfi_t	getfn = dbobjp->odb_getfn;
1518	opdes_t	*opt_arr = dbobjp->odb_opt_des_arr;
1519	uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
1520	boolean_t topmost_tpiprovider = dbobjp->odb_topmost_tpiprovider;
1521
1522	struct T_opthdr *topth;
1523	opdes_t *optd;
1524	int optlen;
1525	uchar_t *initptr = *resptrp;
1526
1527	/*
1528	 * We call getfn to get the current value of an option. The call may
1529	 * fail in which case we copy the values from the input buffer. Maybe
1530	 * something downstream will fill it in or something upstream did.
1531	 */
1532
1533	if (reqopt->name != T_ALLOPT) {
1534		topth = (struct T_opthdr *)*resptrp;
1535		*resptrp += sizeof (struct T_opthdr);
1536		optlen = (*getfn)(q, reqopt->level, reqopt->name, *resptrp);
1537		if (optlen >= 0) {
1538			topth->len = (t_uscalar_t)(optlen +
1539			    sizeof (struct T_opthdr));
1540			topth->level = reqopt->level;
1541			topth->name = reqopt->name;
1542			topth->status = reqopt->status;
1543			*resptrp += _TPI_ALIGN_TOPT(optlen);
1544			*worst_statusp = get_worst_status(topth->status,
1545			    *worst_statusp);
1546		} else {
1547			/* failed - reset "*resptrp" pointer */
1548			*resptrp -= sizeof (struct T_opthdr);
1549		}
1550	} else {		/* T_ALLOPT processing */
1551		ASSERT(topmost_tpiprovider == B_TRUE);
1552		/* scan and get all options */
1553		for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {
1554			/* skip other levels */
1555			if (reqopt->level != optd->opdes_level)
1556				continue;
1557
1558			if (!OA_READ_PERMISSION(optd, cr))
1559				/* skip this one */
1560				continue;
1561
1562			topth = (struct T_opthdr *)*resptrp;
1563			*resptrp += sizeof (struct T_opthdr);
1564
1565			/* get option of this level */
1566			optlen = (*getfn)(q, reqopt->level, optd->opdes_name,
1567			    *resptrp);
1568			if (optlen >= 0) {
1569				/* success */
1570				topth->len = (t_uscalar_t)(optlen +
1571				    sizeof (struct T_opthdr));
1572				topth->level = reqopt->level;
1573				topth->name = optd->opdes_name;
1574				if (OA_READONLY_PERMISSION(optd, cr))
1575					topth->status = T_READONLY;
1576				else
1577					topth->status = T_SUCCESS;
1578				*resptrp += _TPI_ALIGN_TOPT(optlen);
1579			} else {
1580				/*
1581				 * failed, return as T_FAILURE and null value
1582				 * part. Maybe something downstream will
1583				 * handle this one and fill in a value. Here
1584				 * it is just part of T_ALLOPT expansion.
1585				 */
1586				topth->len = sizeof (struct T_opthdr);
1587				topth->level = reqopt->level;
1588				topth->name = optd->opdes_name;
1589				topth->status = T_FAILURE;
1590			}
1591			*worst_statusp = get_worst_status(topth->status,
1592			    *worst_statusp);
1593		} /* end for loop */
1594	}
1595	if (*resptrp == initptr) {
1596		/*
1597		 * getfn failed and does not want to handle this option. Maybe
1598		 * something downstream will or something upstream did. (If
1599		 * topmost_tpiprovider, initialize "status" to failure which
1600		 * can possibly change downstream). Copy the input "as is" from
1601		 * input option buffer if any to maintain transparency.
1602		 */
1603		if (topmost_tpiprovider)
1604			reqopt->status = T_FAILURE;
1605		bcopy(reqopt, *resptrp, reqopt->len);
1606		*resptrp += _TPI_ALIGN_TOPT(reqopt->len);
1607		*worst_statusp = get_worst_status(reqopt->status,
1608		    *worst_statusp);
1609	}
1610}
1611
1612
1613
1614static int
1615do_opt_check_or_negotiate(queue_t *q, struct T_opthdr *reqopt,
1616    uint_t optset_context, uchar_t **resptrp, t_uscalar_t *worst_statusp,
1617    cred_t *cr, optdb_obj_t *dbobjp, mblk_t *first_mp)
1618{
1619	pfi_t	deffn = dbobjp->odb_deffn;
1620	opt_set_fn setfn = dbobjp->odb_setfn;
1621	opdes_t	*opt_arr = dbobjp->odb_opt_des_arr;
1622	uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
1623	boolean_t topmost_tpiprovider = dbobjp->odb_topmost_tpiprovider;
1624
1625	struct T_opthdr *topth;
1626	opdes_t *optd;
1627	int error;
1628	t_uscalar_t optlen;
1629	t_scalar_t optsize;
1630	uchar_t *initptr = *resptrp;
1631
1632	ASSERT(reqopt->status == T_SUCCESS);
1633
1634	if (reqopt->name != T_ALLOPT) {
1635		topth = (struct T_opthdr *)*resptrp;
1636		*resptrp += sizeof (struct T_opthdr);
1637		error = (*setfn)(q, optset_context, reqopt->level, reqopt->name,
1638		    reqopt->len - sizeof (struct T_opthdr),
1639		    _TPI_TOPT_DATA(reqopt), &optlen, _TPI_TOPT_DATA(topth),
1640		    NULL, cr, first_mp);
1641		if (error) {
1642			/* failed - reset "*resptrp" */
1643			*resptrp -= sizeof (struct T_opthdr);
1644			if (error == EINPROGRESS)
1645				return (error);
1646		} else {
1647			/*
1648			 * success - "value" already filled in setfn()
1649			 */
1650			topth->len = (t_uscalar_t)(optlen +
1651			    sizeof (struct T_opthdr));
1652			topth->level = reqopt->level;
1653			topth->name = reqopt->name;
1654			topth->status = reqopt->status;
1655			*resptrp += _TPI_ALIGN_TOPT(optlen);
1656			*worst_statusp = get_worst_status(topth->status,
1657			    *worst_statusp);
1658		}
1659	} else {		/* T_ALLOPT processing */
1660		/* only for T_NEGOTIATE case */
1661		ASSERT(optset_context == SETFN_OPTCOM_NEGOTIATE);
1662		ASSERT(topmost_tpiprovider == B_TRUE);
1663
1664		/* scan and set all options to default value */
1665		for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {
1666
1667			/* skip other levels */
1668			if (reqopt->level != optd->opdes_level)
1669				continue;
1670
1671			if (OA_EXECUTE_PERMISSION(optd, cr) ||
1672			    OA_NO_PERMISSION(optd, cr)) {
1673				/*
1674				 * skip this one too. Does not make sense to
1675				 * set anything to default value for "execute"
1676				 * options.
1677				 */
1678				continue;
1679			}
1680
1681			if (OA_READONLY_PERMISSION(optd, cr)) {
1682				/*
1683				 * Return with T_READONLY status (and no value
1684				 * part). Note: spec is not clear but
1685				 * XTI test suite needs this.
1686				 */
1687				topth = (struct T_opthdr *)*resptrp;
1688				topth->len = sizeof (struct T_opthdr);
1689				*resptrp += topth->len;
1690				topth->level = reqopt->level;
1691				topth->name = optd->opdes_name;
1692				topth->status = T_READONLY;
1693				*worst_statusp = get_worst_status(topth->status,
1694				    *worst_statusp);
1695				continue;
1696			}
1697
1698			/*
1699			 * It is not read only or execute type
1700			 * the it must have write permission
1701			 */
1702			ASSERT(OA_WRITE_PERMISSION(optd, cr));
1703
1704			topth = (struct T_opthdr *)*resptrp;
1705			*resptrp += sizeof (struct T_opthdr);
1706
1707			topth->len = sizeof (struct T_opthdr);
1708			topth->level = reqopt->level;
1709			topth->name = optd->opdes_name;
1710			if (optd->opdes_props & OP_NODEFAULT) {
1711				/*
1712				 * Option of "no default value" so it does not
1713				 * make sense to try to set it. We just return
1714				 * header with status of T_SUCCESS
1715				 * XXX should this be failure ?
1716				 */
1717				topth->status = T_SUCCESS;
1718				continue; /* skip setting */
1719			}
1720			if (optd->opdes_props & OP_DEF_FN) {
1721				if ((optd->opdes_props & OP_VARLEN) ||
1722				    ((optsize = (*deffn)(q, reqopt->level,
1723				    optd->opdes_name,
1724				    (uchar_t *)optd->opdes_defbuf)) < 0)) {
1725					/* XXX - skip these too */
1726					topth->status = T_SUCCESS;
1727					continue; /* skip setting */
1728				}
1729			} else {
1730				optsize = optd->opdes_size;
1731			}
1732
1733
1734			/* set option of this level */
1735			error = (*setfn)(q, SETFN_OPTCOM_NEGOTIATE,
1736			    reqopt->level, optd->opdes_name, optsize,
1737			    (uchar_t *)optd->opdes_defbuf, &optlen,
1738			    _TPI_TOPT_DATA(topth), NULL, cr, NULL);
1739			if (error) {
1740				/*
1741				 * failed, return as T_FAILURE and null value
1742				 * part. Maybe something downstream will
1743				 * handle this one and fill in a value. Here
1744				 * it is just part of T_ALLOPT expansion.
1745				 */
1746				topth->status = T_FAILURE;
1747				*worst_statusp = get_worst_status(topth->status,
1748				    *worst_statusp);
1749			} else {
1750				/* success */
1751				topth->len += optlen;
1752				topth->status = T_SUCCESS;
1753				*resptrp += _TPI_ALIGN_TOPT(optlen);
1754			}
1755		} /* end for loop */
1756		/* END T_ALLOPT */
1757	}
1758
1759	if (*resptrp == initptr) {
1760		/*
1761		 * setfn failed and does not want to handle this option. Maybe
1762		 * something downstream will or something upstream
1763		 * did. Copy the input as is from input option buffer if any to
1764		 * maintain transparency (maybe something at a level above
1765		 * did something.
1766		 */
1767		if (topmost_tpiprovider)
1768			reqopt->status = T_FAILURE;
1769		bcopy(reqopt, *resptrp, reqopt->len);
1770		*resptrp += _TPI_ALIGN_TOPT(reqopt->len);
1771		*worst_statusp = get_worst_status(reqopt->status,
1772		    *worst_statusp);
1773	}
1774	return (0);
1775}
1776
1777/*
1778 * The following routines process options buffer passed with
1779 * T_CONN_REQ, T_CONN_RES and T_UNITDATA_REQ.
1780 * This routine does the consistency check applied to the
1781 * sanity of formatting of multiple options packed in the
1782 * buffer.
1783 *
1784 * XTI brain damage alert:
1785 * XTI interface adopts the notion of an option being an
1786 * "absolute requirement" from OSI transport service (but applies
1787 * it to all transports including Internet transports).
1788 * The main effect of that is action on failure to "negotiate" a
1789 * requested option to the exact requested value
1790 *
1791 *          - if the option is an "absolute requirement", the primitive
1792 *            is aborted (e.g T_DISCON_REQ or T_UDERR generated)
1793 *          - if the option is NOT and "absolute requirement" it can
1794 *            just be ignored.
1795 *
1796 * We would not support "negotiating" of options on connection
1797 * primitives for Internet transports. However just in case we
1798 * forced to in order to pass strange test suites, the design here
1799 * tries to support these notions.
1800 *
1801 * tpi_optcom_buf(q, mp, opt_lenp, opt_offset, cred, dbobjp, thisdg_attrs,
1802 *	*is_absreq_failurep)
1803 *
1804 * - Verify the option buffer, if formatted badly, return error 1
1805 *
1806 * - If it is a "permissions" failure (read-only), return error 2
1807 *
1808 * - Else, process the option "in place", the following can happen,
1809 *	     - if a "privileged" option, mark it as "ignored".
1810 *	     - if "not supported", mark "ignored"
1811 *	     - if "supported" attempt negotiation and fill result in
1812 *	       the outcome
1813 *			- if "absolute requirement", set "*is_absreq_failurep"
1814 *			- if NOT an "absolute requirement", then our
1815 *			  interpretation is to mark is at ignored if
1816 *			  negotiation fails (Spec allows partial success
1817 *			  as in OSI protocols but not failure)
1818 *
1819 *   Then delete "ignored" options from option buffer and return success.
1820 *
1821 */
1822
1823int
1824tpi_optcom_buf(queue_t *q, mblk_t *mp, t_scalar_t *opt_lenp,
1825    t_scalar_t opt_offset, cred_t *cr, optdb_obj_t *dbobjp,
1826    void *thisdg_attrs, int *is_absreq_failurep)
1827{
1828	opt_set_fn setfn = dbobjp->odb_setfn;
1829	opdes_t *opt_arr = dbobjp->odb_opt_des_arr;
1830	uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
1831	struct T_opthdr *opt, *opt_start, *opt_end;
1832	mblk_t  *copy_mp_head;
1833	uchar_t *optr, *init_optr;
1834	opdes_t *optd;
1835	uint_t optset_context;
1836	t_uscalar_t olen;
1837	int error = 0;
1838
1839	ASSERT((uchar_t *)opt_lenp > mp->b_rptr &&
1840	    (uchar_t *)opt_lenp < mp->b_wptr);
1841
1842	copy_mp_head = NULL;
1843	*is_absreq_failurep = 0;
1844	switch (((union T_primitives *)mp->b_rptr)->type) {
1845	case T_CONN_REQ:
1846	case T_CONN_RES:
1847		optset_context = SETFN_CONN_NEGOTIATE;
1848		break;
1849	case T_UNITDATA_REQ:
1850		optset_context = SETFN_UD_NEGOTIATE;
1851		break;
1852	default:
1853		/*
1854		 * should never get here, all possible TPI primitives
1855		 * where this can be called from should be accounted
1856		 * for in the cases above
1857		 */
1858		return (EINVAL);
1859	}
1860
1861	if ((opt_start = (struct T_opthdr *)
1862	    mi_offset_param(mp, opt_offset, *opt_lenp)) == NULL) {
1863		error = ENOPROTOOPT;
1864		goto error_ret;
1865	}
1866	if (!__TPI_TOPT_ISALIGNED(opt_start)) {
1867		error = ENOPROTOOPT;
1868		goto error_ret;
1869	}
1870
1871	opt_end = (struct T_opthdr *)((uchar_t *)opt_start
1872	    + *opt_lenp);
1873
1874	if ((copy_mp_head = copyb(mp)) == (mblk_t *)NULL) {
1875		error = ENOMEM;
1876		goto error_ret;
1877	}
1878
1879	init_optr = optr = (uchar_t *)&copy_mp_head->b_rptr[opt_offset];
1880
1881	for (opt = opt_start; opt && (opt < opt_end);
1882	    opt = _TPI_TOPT_NEXTHDR(opt_start, *opt_lenp, opt)) {
1883		/*
1884		 * Validate the option for length and alignment
1885		 * before accessing anything in it
1886		 */
1887		if (!_TPI_TOPT_VALID(opt, opt_start, opt_end)) {
1888			error = ENOPROTOOPT;
1889			goto error_ret;
1890		}
1891
1892		/* Find the option in the opt_arr. */
1893		optd = opt_chk_lookup(opt->level, opt->name,
1894		    opt_arr, opt_arr_cnt);
1895
1896		if (optd == NULL) {
1897			/*
1898			 * Option not found
1899			 */
1900			opt->status = T_NOTSUPPORT;
1901			continue;
1902		}
1903
1904		/*
1905		 * Weird but as in XTI spec.
1906		 * Sec 6.3.6 "Privileged and ReadOnly Options"
1907		 * Permission problems (e.g.readonly) fail with bad access
1908		 * BUT "privileged" option request from those NOT PRIVILEGED
1909		 * are to be merely "ignored".
1910		 * XXX Prevents "probing" of privileged options ?
1911		 */
1912		if (OA_READONLY_PERMISSION(optd, cr)) {
1913			error = EACCES;
1914			goto error_ret;
1915		}
1916		if (OA_MATCHED_PRIV(optd, cr)) {
1917			/*
1918			 * For privileged options, we DO perform
1919			 * access checks as is common sense
1920			 */
1921			if (!OA_WX_ANYPRIV(optd)) {
1922				error = EACCES;
1923				goto error_ret;
1924			}
1925		} else {
1926			/*
1927			 * For non privileged, we fail instead following
1928			 * "ignore" semantics dictated by XTI spec for
1929			 * permissions problems.
1930			 * Sec 6.3.6 "Privileged and ReadOnly Options"
1931			 * XXX Should we do "ignore" semantics ?
1932			 */
1933			if (!OA_WX_NOPRIV(optd)) { /* nopriv */
1934				opt->status = T_FAILURE;
1935				continue;
1936			}
1937		}
1938		/*
1939		 *
1940		 * If the negotiation fails, for options that
1941		 * are "absolute requirement", it is a fatal error.
1942		 * For options that are NOT "absolute requirements",
1943		 * and the value fails to negotiate, the XTI spec
1944		 * only considers the possibility of partial success
1945		 * (T_PARTSUCCES - not likely for Internet protocols).
1946		 * The spec is in denial about complete failure
1947		 * (T_FAILURE) to negotiate for options that are
1948		 * carried on T_CONN_REQ/T_CONN_RES/T_UNITDATA
1949		 * We interpret the T_FAILURE to negotiate an option
1950		 * that is NOT an absolute requirement that it is safe
1951		 * to ignore it.
1952		 */
1953
1954		/* verify length */
1955		if (!opt_length_ok(optd, opt)) {
1956			/* bad size */
1957			if ((optd->opdes_props & OP_NOT_ABSREQ) == 0) {
1958				/* option is absolute requirement */
1959				*is_absreq_failurep = 1;
1960				error = EINVAL;
1961				goto error_ret;
1962			}
1963			opt->status = T_FAILURE;
1964			continue;
1965		}
1966
1967		/*
1968		 * verified generic attributes. Now call set function.
1969		 * Note: We assume the following to simplify code.
1970		 * XXX If this is found not to be valid, this routine
1971		 * will need to be rewritten. At this point it would
1972		 * be premature to introduce more complexity than is
1973		 * needed.
1974		 * Assumption: For variable length options, we assume
1975		 * that the value returned will be same or less length
1976		 * (size does not increase). This makes it OK to pass the
1977		 * same space for output as it is on input.
1978		 */
1979
1980		error = (*setfn)(q, optset_context, opt->level, opt->name,
1981		    opt->len - (t_uscalar_t)sizeof (struct T_opthdr),
1982		    _TPI_TOPT_DATA(opt), &olen, _TPI_TOPT_DATA(opt),
1983		    thisdg_attrs, cr, NULL);
1984
1985		if (olen > (int)(opt->len - sizeof (struct T_opthdr))) {
1986			/*
1987			 * Space on output more than space on input. Should
1988			 * not happen and we consider it a bug/error.
1989			 * More of a restriction than an error in our
1990			 * implementation. Will see if we can live with this
1991			 * otherwise code will get more hairy with multiple
1992			 * passes.
1993			 */
1994			error = EINVAL;
1995			goto error_ret;
1996		}
1997		if (error != 0) {
1998			if ((optd->opdes_props & OP_NOT_ABSREQ) == 0) {
1999				/* option is absolute requirement. */
2000				*is_absreq_failurep = 1;
2001				goto error_ret;
2002			}
2003			/*
2004			 * failed - but option "not an absolute
2005			 * requirement"
2006			 */
2007			opt->status = T_FAILURE;
2008			continue;
2009		}
2010		/*
2011		 * Fill in the only possible successful result
2012		 * (Note: TPI allows for T_PARTSUCCESS - partial
2013		 * sucess result code which is relevant in OSI world
2014		 * and not possible in Internet code)
2015		 */
2016		opt->status = T_SUCCESS;
2017
2018		/*
2019		 * Add T_SUCCESS result code options to the "output" options.
2020		 * No T_FAILURES or T_NOTSUPPORT here as they are to be
2021		 * ignored.
2022		 * This code assumes output option buffer will
2023		 * be <= input option buffer.
2024		 *
2025		 * Copy option header+value
2026		 */
2027		bcopy(opt, optr, opt->len);
2028		optr +=  _TPI_ALIGN_TOPT(opt->len);
2029	}
2030	/*
2031	 * Overwrite the input mblk option buffer now with the output
2032	 * and update length, and contents in original mbl
2033	 * (offset remains unchanged).
2034	 */
2035	*opt_lenp = (t_scalar_t)(optr - init_optr);
2036	if (*opt_lenp > 0) {
2037		bcopy(init_optr, opt_start, *opt_lenp);
2038	}
2039
2040error_ret:
2041	if (copy_mp_head != NULL)
2042		freeb(copy_mp_head);
2043	return (error);
2044}
2045
2046static opdes_t *
2047opt_chk_lookup(t_uscalar_t level, t_uscalar_t name, opdes_t *opt_arr,
2048    uint_t opt_arr_cnt)
2049{
2050	opdes_t		*optd;
2051
2052	for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt];
2053	    optd++) {
2054		if (level == (uint_t)optd->opdes_level &&
2055		    name == (uint_t)optd->opdes_name)
2056			return (optd);
2057	}
2058	return (NULL);
2059}
2060
2061static boolean_t
2062opt_level_valid(t_uscalar_t level, optlevel_t *valid_level_arr,
2063    uint_t valid_level_arr_cnt)
2064{
2065	optlevel_t		*olp;
2066
2067	for (olp = valid_level_arr;
2068	    olp < &valid_level_arr[valid_level_arr_cnt];
2069	    olp++) {
2070		if (level == (uint_t)(*olp))
2071			return (B_TRUE);
2072	}
2073	return (B_FALSE);
2074}
2075
2076
2077/*
2078 * Compute largest possible size for an option buffer containing
2079 * all options in one buffer.
2080 *
2081 * XXX TBD, investigate use of opt_bloated_maxsize() to avoid
2082 *     wastefully large buffer allocation.
2083 */
2084static size_t
2085opt_level_allopts_lengths(t_uscalar_t level, opdes_t *opt_arr,
2086    uint_t opt_arr_cnt)
2087{
2088	opdes_t		*optd;
2089	size_t allopt_len = 0;	/* 0 implies no option at this level */
2090
2091	/*
2092	 * Scan opt_arr computing aggregate length
2093	 * requirement for storing values of all
2094	 * options.
2095	 * Note: we do not filter for permissions
2096	 * etc. This will be >= the real aggregate
2097	 * length required (upper bound).
2098	 */
2099
2100	for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt];
2101	    optd++) {
2102		if (level == optd->opdes_level) {
2103			allopt_len += sizeof (struct T_opthdr) +
2104			    _TPI_ALIGN_TOPT(optd->opdes_size);
2105		}
2106	}
2107	return (allopt_len);	/* 0 implies level not found */
2108}
2109
2110/*
2111 * Compute largest possible size for an option buffer containing
2112 * all options in one buffer - a (theoretical?) worst case scenario
2113 * for certain cases.
2114 */
2115t_uscalar_t
2116optcom_max_optbuf_len(opdes_t *opt_arr, uint_t opt_arr_cnt)
2117{
2118	t_uscalar_t max_optbuf_len = sizeof (struct T_info_ack);
2119	opdes_t		*optd;
2120
2121	for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {
2122		max_optbuf_len += (t_uscalar_t)sizeof (struct T_opthdr) +
2123		    (t_uscalar_t)_TPI_ALIGN_TOPT(optd->opdes_size);
2124	}
2125	return (max_optbuf_len);
2126}
2127
2128/*
2129 * Compute largest possible size for OPT_size for a transport.
2130 * Heuristic used is to add all but certain extremely large
2131 * size options; this is done by calling opt_bloated_maxsize().
2132 * It affects user level allocations in TLI/XTI code using t_alloc()
2133 * and other TLI/XTI implementation instance strucutures.
2134 * The large size options excluded are presumed to be
2135 * never accessed through the (theoretical?) worst case code paths
2136 * through TLI/XTI as they are currently IPv6 specific options.
2137 */
2138
2139t_uscalar_t
2140optcom_max_optsize(opdes_t *opt_arr, uint_t opt_arr_cnt)
2141{
2142	t_uscalar_t max_optbuf_len = sizeof (struct T_info_ack);
2143	opdes_t		*optd;
2144
2145	for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {
2146		if (!opt_bloated_maxsize(optd)) {
2147			max_optbuf_len +=
2148			    (t_uscalar_t)sizeof (struct T_opthdr) +
2149			    (t_uscalar_t)_TPI_ALIGN_TOPT(optd->opdes_size);
2150		}
2151	}
2152	return (max_optbuf_len);
2153}
2154
2155/*
2156 * The theoretical model used in optcom_max_optsize() and
2157 * opt_level_allopts_lengths() accounts for the worst case of all
2158 * possible options for the theoretical cases and results in wasteful
2159 * memory allocations for certain theoretically correct usage scenarios.
2160 * In practice, the "features" they support are rarely, if ever,
2161 * used and even then only by test suites for those features (VSU, VST).
2162 * However, they result in large allocations due to the increased transport
2163 * T_INFO_ACK OPT_size field affecting t_alloc() users and TLI/XTI library
2164 * instance data structures for applications.
2165 *
2166 * The following routine opt_bloated_maxsize() supports a hack that avoids
2167 * paying the tax for the bloated options by excluding them and pretending
2168 * they don't exist for certain features without affecting features that
2169 * do use them.
2170 *
2171 * XXX Currently implemented only for optcom_max_optsize()
2172 *     (to reduce risk late in release).
2173 *     TBD for future, investigate use in optcom_level_allopts_lengths() and
2174 *     all the instances of T_ALLOPT processing to exclude "bloated options".
2175 *     Will not affect VSU/VST tests as they do not test with IPPROTO_IPV6
2176 *     level options which are the only ones that fit the "bloated maxsize"
2177 *     option profile now.
2178 */
2179static boolean_t
2180opt_bloated_maxsize(opdes_t *optd)
2181{
2182	if (optd->opdes_level != IPPROTO_IPV6)
2183		return (B_FALSE);
2184	switch (optd->opdes_name) {
2185	case IPV6_HOPOPTS:
2186	case IPV6_DSTOPTS:
2187	case IPV6_RTHDRDSTOPTS:
2188	case IPV6_RTHDR:
2189	case IPV6_PATHMTU:
2190		return (B_TRUE);
2191	default:
2192		break;
2193	}
2194	return (B_FALSE);
2195}
2196
2197static boolean_t
2198opt_length_ok(opdes_t *optd, struct T_opthdr *opt)
2199{
2200	/*
2201	 * Verify length.
2202	 * Value specified should match length of fixed length option or be
2203	 * less than maxlen of variable length option.
2204	 */
2205	if (optd->opdes_props & OP_VARLEN) {
2206		if (opt->len <= optd->opdes_size +
2207		    (t_uscalar_t)sizeof (struct T_opthdr))
2208			return (B_TRUE);
2209	} else {
2210		/* fixed length option */
2211		if (opt->len == optd->opdes_size +
2212		    (t_uscalar_t)sizeof (struct T_opthdr))
2213			return (B_TRUE);
2214	}
2215	return (B_FALSE);
2216}
2217
2218/*
2219 * This routine appends a pssed in hop-by-hop option to the existing
2220 * option (in this case a cipso label encoded in HOPOPT option). The
2221 * passed in option is always padded. The 'reservelen' is the
2222 * length of reserved data (label). New memory will be allocated if
2223 * the current buffer is not large enough. Return failure if memory
2224 * can not be allocated.
2225 */
2226int
2227optcom_pkt_set(uchar_t *invalp, uint_t inlen, boolean_t sticky,
2228    uchar_t **optbufp, uint_t *optlenp, uint_t reservelen)
2229{
2230	uchar_t *optbuf;
2231	uchar_t	*optp;
2232
2233	if (!sticky) {
2234		*optbufp = invalp;
2235		*optlenp = inlen;
2236		return (0);
2237	}
2238
2239	if (inlen == *optlenp - reservelen) {
2240		/* Unchanged length - no need to reallocate */
2241		optp = *optbufp + reservelen;
2242		bcopy(invalp, optp, inlen);
2243		if (reservelen != 0) {
2244			/*
2245			 * Convert the NextHeader and Length of the
2246			 * passed in hop-by-hop header to pads
2247			 */
2248			optp[0] = IP6OPT_PADN;
2249			optp[1] = 0;
2250		}
2251		return (0);
2252	}
2253	if (inlen + reservelen > 0) {
2254		/* Allocate new buffer before free */
2255		optbuf = kmem_alloc(inlen + reservelen, KM_NOSLEEP);
2256		if (optbuf == NULL)
2257			return (ENOMEM);
2258	} else {
2259		optbuf = NULL;
2260	}
2261
2262	/* Copy out old reserved data (label) */
2263	if (reservelen > 0)
2264		bcopy(*optbufp, optbuf, reservelen);
2265
2266	/* Free old buffer */
2267	if (*optlenp != 0)
2268		kmem_free(*optbufp, *optlenp);
2269
2270	if (inlen > 0)
2271		bcopy(invalp, optbuf + reservelen, inlen);
2272
2273	if (reservelen != 0) {
2274		/*
2275		 * Convert the NextHeader and Length of the
2276		 * passed in hop-by-hop header to pads
2277		 */
2278		optbuf[reservelen] = IP6OPT_PADN;
2279		optbuf[reservelen + 1] = 0;
2280		/*
2281		 * Set the Length of the hop-by-hop header, number of 8
2282		 * byte-words following the 1st 8 bytes
2283		 */
2284		optbuf[1] = (reservelen + inlen - 1) >> 3;
2285	}
2286	*optbufp = optbuf;
2287	*optlenp = inlen + reservelen;
2288	return (0);
2289}
2290