1/*
2 * lib/nl.c		Core Netlink Interface
3 *
4 *	This library is free software; you can redistribute it and/or
5 *	modify it under the terms of the GNU Lesser General Public
6 *	License as published by the Free Software Foundation version 2.1
7 *	of the License.
8 *
9 * Copyright (c) 2003-2008 Thomas Graf <tgraf@suug.ch>
10 */
11
12/**
13 * @defgroup core Core
14 *
15 * @details
16 * @par 1) Connecting the socket
17 * @code
18 * // Bind and connect the socket to a protocol, NETLINK_ROUTE in this example.
19 * nl_connect(sk, NETLINK_ROUTE);
20 * @endcode
21 *
22 * @par 2) Sending data
23 * @code
24 * // The most rudimentary method is to use nl_sendto() simply pushing
25 * // a piece of data to the other netlink peer. This method is not
26 * // recommended.
27 * const char buf[] = { 0x01, 0x02, 0x03, 0x04 };
28 * nl_sendto(sk, buf, sizeof(buf));
29 *
30 * // A more comfortable interface is nl_send() taking a pointer to
31 * // a netlink message.
32 * struct nl_msg *msg = my_msg_builder();
33 * nl_send(sk, nlmsg_hdr(msg));
34 *
35 * // nl_sendmsg() provides additional control over the sendmsg() message
36 * // header in order to allow more specific addressing of multiple peers etc.
37 * struct msghdr hdr = { ... };
38 * nl_sendmsg(sk, nlmsg_hdr(msg), &hdr);
39 *
40 * // You're probably too lazy to fill out the netlink pid, sequence number
41 * // and message flags all the time. nl_send_auto_complete() automatically
42 * // extends your message header as needed with an appropriate sequence
43 * // number, the netlink pid stored in the netlink socket and the message
44 * // flags NLM_F_REQUEST and NLM_F_ACK (if not disabled in the socket)
45 * nl_send_auto_complete(sk, nlmsg_hdr(msg));
46 *
47 * // Simple protocols don't require the complex message construction interface
48 * // and may favour nl_send_simple() to easly send a bunch of payload
49 * // encapsulated in a netlink message header.
50 * nl_send_simple(sk, MY_MSG_TYPE, 0, buf, sizeof(buf));
51 * @endcode
52 *
53 * @par 3) Receiving data
54 * @code
55 * // nl_recv() receives a single message allocating a buffer for the message
56 * // content and gives back the pointer to you.
57 * struct sockaddr_nl peer;
58 * unsigned char *msg;
59 * nl_recv(sk, &peer, &msg);
60 *
61 * // nl_recvmsgs() receives a bunch of messages until the callback system
62 * // orders it to state, usually after receving a compolete multi part
63 * // message series.
64 * nl_recvmsgs(sk, my_callback_configuration);
65 *
66 * // nl_recvmsgs_default() acts just like nl_recvmsg() but uses the callback
67 * // configuration stored in the socket.
68 * nl_recvmsgs_default(sk);
69 *
70 * // In case you want to wait for the ACK to be recieved that you requested
71 * // with your latest message, you can call nl_wait_for_ack()
72 * nl_wait_for_ack(sk);
73 * @endcode
74 *
75 * @par 4) Closing
76 * @code
77 * // Close the socket first to release kernel memory
78 * nl_close(sk);
79 * @endcode
80 *
81 * @{
82 */
83
84#include <netlink-local.h>
85#include <netlink/netlink.h>
86#include <netlink/utils.h>
87#include <netlink/handlers.h>
88#include <netlink/msg.h>
89#include <netlink/attr.h>
90
91/**
92 * @name Connection Management
93 * @{
94 */
95
96/**
97 * Create and connect netlink socket.
98 * @arg sk		Netlink socket.
99 * @arg protocol	Netlink protocol to use.
100 *
101 * Creates a netlink socket using the specified protocol, binds the socket
102 * and issues a connection attempt.
103 *
104 * @return 0 on success or a negative error code.
105 */
106int nl_connect(struct nl_sock *sk, int protocol)
107{
108	int err;
109	socklen_t addrlen;
110
111	sk->s_fd = socket(AF_NETLINK, SOCK_RAW, protocol);
112	if (sk->s_fd < 0) {
113		err = -nl_syserr2nlerr(errno);
114		goto errout;
115	}
116
117	if (!(sk->s_flags & NL_SOCK_BUFSIZE_SET)) {
118		err = nl_socket_set_buffer_size(sk, 0, 0);
119		if (err < 0)
120			goto errout;
121	}
122
123	err = bind(sk->s_fd, (struct sockaddr*) &sk->s_local,
124		   sizeof(sk->s_local));
125	if (err < 0) {
126		err = -nl_syserr2nlerr(errno);
127		goto errout;
128	}
129
130	addrlen = sizeof(sk->s_local);
131	err = getsockname(sk->s_fd, (struct sockaddr *) &sk->s_local,
132			  &addrlen);
133	if (err < 0) {
134		err = -nl_syserr2nlerr(errno);
135		goto errout;
136	}
137
138	if (addrlen != sizeof(sk->s_local)) {
139		err = -NLE_NOADDR;
140		goto errout;
141	}
142
143	if (sk->s_local.nl_family != AF_NETLINK) {
144		err = -NLE_AF_NOSUPPORT;
145		goto errout;
146	}
147
148	sk->s_proto = protocol;
149
150	return 0;
151errout:
152	close(sk->s_fd);
153	sk->s_fd = -1;
154
155	return err;
156}
157
158/**
159 * Close/Disconnect netlink socket.
160 * @arg sk		Netlink socket.
161 */
162void nl_close(struct nl_sock *sk)
163{
164	if (sk->s_fd >= 0) {
165		close(sk->s_fd);
166		sk->s_fd = -1;
167	}
168
169	sk->s_proto = 0;
170}
171
172/** @} */
173
174/**
175 * @name Send
176 * @{
177 */
178
179/**
180 * Send raw data over netlink socket.
181 * @arg sk		Netlink socket.
182 * @arg buf		Data buffer.
183 * @arg size		Size of data buffer.
184 * @return Number of characters written on success or a negative error code.
185 */
186int nl_sendto(struct nl_sock *sk, void *buf, size_t size)
187{
188	int ret;
189
190	ret = sendto(sk->s_fd, buf, size, 0, (struct sockaddr *)
191		     &sk->s_peer, sizeof(sk->s_peer));
192	if (ret < 0)
193		return -nl_syserr2nlerr(errno);
194
195	return ret;
196}
197
198/**
199 * Send netlink message with control over sendmsg() message header.
200 * @arg sk		Netlink socket.
201 * @arg msg		Netlink message to be sent.
202 * @arg hdr		Sendmsg() message header.
203 * @return Number of characters sent on sucess or a negative error code.
204 */
205int nl_sendmsg(struct nl_sock *sk, struct nl_msg *msg, struct msghdr *hdr)
206{
207	struct nl_cb *cb;
208	int ret;
209
210	struct iovec iov = {
211		.iov_base = (void *) nlmsg_hdr(msg),
212		.iov_len = nlmsg_hdr(msg)->nlmsg_len,
213	};
214
215	hdr->msg_iov = &iov;
216	hdr->msg_iovlen = 1;
217
218	nlmsg_set_src(msg, &sk->s_local);
219
220	cb = sk->s_cb;
221	if (cb->cb_set[NL_CB_MSG_OUT])
222		if (nl_cb_call(cb, NL_CB_MSG_OUT, msg) != NL_OK)
223			return 0;
224
225	ret = sendmsg(sk->s_fd, hdr, 0);
226	if (ret < 0)
227		return -nl_syserr2nlerr(errno);
228
229	return ret;
230}
231
232
233/**
234 * Send netlink message.
235 * @arg sk		Netlink socket.
236 * @arg msg		Netlink message to be sent.
237 * @see nl_sendmsg()
238 * @return Number of characters sent on success or a negative error code.
239 */
240int nl_send(struct nl_sock *sk, struct nl_msg *msg)
241{
242	struct sockaddr_nl *dst;
243	struct ucred *creds;
244
245	struct msghdr hdr = {
246		.msg_name = (void *) &sk->s_peer,
247		.msg_namelen = sizeof(struct sockaddr_nl),
248	};
249
250	/* Overwrite destination if specified in the message itself, defaults
251	 * to the peer address of the socket.
252	 */
253	dst = nlmsg_get_dst(msg);
254	if (dst->nl_family == AF_NETLINK)
255		hdr.msg_name = dst;
256
257	/* Add credentials if present. */
258	creds = nlmsg_get_creds(msg);
259	if (creds != NULL) {
260		char buf[CMSG_SPACE(sizeof(struct ucred))];
261		struct cmsghdr *cmsg;
262
263		hdr.msg_control = buf;
264		hdr.msg_controllen = sizeof(buf);
265
266		cmsg = CMSG_FIRSTHDR(&hdr);
267		cmsg->cmsg_level = SOL_SOCKET;
268		cmsg->cmsg_type = SCM_CREDENTIALS;
269		cmsg->cmsg_len = CMSG_LEN(sizeof(struct ucred));
270		memcpy(CMSG_DATA(cmsg), creds, sizeof(struct ucred));
271	}
272
273	return nl_sendmsg(sk, msg, &hdr);
274}
275
276/**
277 * Send netlink message and check & extend header values as needed.
278 * @arg sk		Netlink socket.
279 * @arg msg		Netlink message to be sent.
280 *
281 * Checks the netlink message \c nlh for completness and extends it
282 * as required before sending it out. Checked fields include pid,
283 * sequence nr, and flags.
284 *
285 * @see nl_send()
286 * @return Number of characters sent or a negative error code.
287 */
288int nl_send_auto_complete(struct nl_sock *sk, struct nl_msg *msg)
289{
290	struct nlmsghdr *nlh;
291	struct nl_cb *cb = sk->s_cb;
292
293	nlh = nlmsg_hdr(msg);
294	if (nlh->nlmsg_pid == 0)
295		nlh->nlmsg_pid = sk->s_local.nl_pid;
296
297	if (nlh->nlmsg_seq == 0)
298		nlh->nlmsg_seq = sk->s_seq_next++;
299
300	if (msg->nm_protocol == -1)
301		msg->nm_protocol = sk->s_proto;
302
303	nlh->nlmsg_flags |= NLM_F_REQUEST;
304
305	if (!(sk->s_flags & NL_NO_AUTO_ACK))
306		nlh->nlmsg_flags |= NLM_F_ACK;
307
308	if (cb->cb_send_ow)
309		return cb->cb_send_ow(sk, msg);
310	else
311		return nl_send(sk, msg);
312}
313
314/**
315 * Send simple netlink message using nl_send_auto_complete()
316 * @arg sk		Netlink socket.
317 * @arg type		Netlink message type.
318 * @arg flags		Netlink message flags.
319 * @arg buf		Data buffer.
320 * @arg size		Size of data buffer.
321 *
322 * Builds a netlink message with the specified type and flags and
323 * appends the specified data as payload to the message.
324 *
325 * @see nl_send_auto_complete()
326 * @return Number of characters sent on success or a negative error code.
327 */
328int nl_send_simple(struct nl_sock *sk, int type, int flags, void *buf,
329		   size_t size)
330{
331	int err;
332	struct nl_msg *msg;
333
334	msg = nlmsg_alloc_simple(type, flags);
335	if (!msg)
336		return -NLE_NOMEM;
337
338	if (buf && size) {
339		err = nlmsg_append(msg, buf, size, NLMSG_ALIGNTO);
340		if (err < 0)
341			goto errout;
342	}
343
344
345	err = nl_send_auto_complete(sk, msg);
346errout:
347	nlmsg_free(msg);
348
349	return err;
350}
351
352/** @} */
353
354/**
355 * @name Receive
356 * @{
357 */
358
359/**
360 * Receive data from netlink socket
361 * @arg sk		Netlink socket.
362 * @arg nla		Destination pointer for peer's netlink address.
363 * @arg buf		Destination pointer for message content.
364 * @arg creds		Destination pointer for credentials.
365 *
366 * Receives a netlink message, allocates a buffer in \c *buf and
367 * stores the message content. The peer's netlink address is stored
368 * in \c *nla. The caller is responsible for freeing the buffer allocated
369 * in \c *buf if a positive value is returned.  Interruped system calls
370 * are handled by repeating the read. The input buffer size is determined
371 * by peeking before the actual read is done.
372 *
373 * A non-blocking sockets causes the function to return immediately with
374 * a return value of 0 if no data is available.
375 *
376 * @return Number of octets read, 0 on EOF or a negative error code.
377 */
378int nl_recv(struct nl_sock *sk, struct sockaddr_nl *nla,
379	    unsigned char **buf, struct ucred **creds)
380{
381	int n;
382	int flags = 0;
383	static int page_size = 0;
384	struct iovec iov;
385	struct msghdr msg = {
386		.msg_name = (void *) nla,
387		.msg_namelen = sizeof(struct sockaddr_nl),
388		.msg_iov = &iov,
389		.msg_iovlen = 1,
390		.msg_control = NULL,
391		.msg_controllen = 0,
392		.msg_flags = 0,
393	};
394	struct cmsghdr *cmsg;
395
396	if (sk->s_flags & NL_MSG_PEEK)
397		flags |= MSG_PEEK;
398
399	if (page_size == 0)
400		page_size = getpagesize() * 4;
401
402	iov.iov_len = page_size;
403	iov.iov_base = *buf = malloc(iov.iov_len);
404
405	if (sk->s_flags & NL_SOCK_PASSCRED) {
406		msg.msg_controllen = CMSG_SPACE(sizeof(struct ucred));
407		msg.msg_control = calloc(1, msg.msg_controllen);
408	}
409retry:
410
411	n = recvmsg(sk->s_fd, &msg, flags);
412	if (!n)
413		goto abort;
414	else if (n < 0) {
415		if (errno == EINTR) {
416			NL_DBG(3, "recvmsg() returned EINTR, retrying\n");
417			goto retry;
418		} else if (errno == EAGAIN) {
419			NL_DBG(3, "recvmsg() returned EAGAIN, aborting\n");
420			goto abort;
421		} else {
422			free(msg.msg_control);
423			free(*buf);
424			return -nl_syserr2nlerr(errno);
425		}
426	}
427
428	if (iov.iov_len < n ||
429	    msg.msg_flags & MSG_TRUNC) {
430		/* Provided buffer is not long enough, enlarge it
431		 * and try again. */
432		iov.iov_len *= 2;
433		iov.iov_base = *buf = realloc(*buf, iov.iov_len);
434		goto retry;
435	} else if (msg.msg_flags & MSG_CTRUNC) {
436		msg.msg_controllen *= 2;
437		msg.msg_control = realloc(msg.msg_control, msg.msg_controllen);
438		goto retry;
439	} else if (flags != 0) {
440		/* Buffer is big enough, do the actual reading */
441		flags = 0;
442		goto retry;
443	}
444
445	if (msg.msg_namelen != sizeof(struct sockaddr_nl)) {
446		free(msg.msg_control);
447		free(*buf);
448		return -NLE_NOADDR;
449	}
450
451	for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
452		if (cmsg->cmsg_level == SOL_SOCKET &&
453		    cmsg->cmsg_type == SCM_CREDENTIALS) {
454			*creds = calloc(1, sizeof(struct ucred));
455			memcpy(*creds, CMSG_DATA(cmsg), sizeof(struct ucred));
456			break;
457		}
458	}
459
460	free(msg.msg_control);
461	return n;
462
463abort:
464	free(msg.msg_control);
465	free(*buf);
466	return 0;
467}
468
469#define NL_CB_CALL(cb, type, msg) \
470do { \
471	err = nl_cb_call(cb, type, msg); \
472	switch (err) { \
473	case NL_OK: \
474		err = 0; \
475		break; \
476	case NL_SKIP: \
477		goto skip; \
478	case NL_STOP: \
479		goto stop; \
480	default: \
481		goto out; \
482	} \
483} while (0)
484
485static int recvmsgs(struct nl_sock *sk, struct nl_cb *cb)
486{
487	int n, err = 0, multipart = 0;
488	unsigned char *buf = NULL;
489	struct nlmsghdr *hdr;
490	struct sockaddr_nl nla = {0};
491	struct nl_msg *msg = NULL;
492	struct ucred *creds = NULL;
493
494continue_reading:
495	NL_DBG(3, "Attempting to read from %p\n", sk);
496	if (cb->cb_recv_ow)
497		n = cb->cb_recv_ow(sk, &nla, &buf, &creds);
498	else
499		n = nl_recv(sk, &nla, &buf, &creds);
500
501	if (n <= 0)
502		return n;
503
504	NL_DBG(3, "recvmsgs(%p): Read %d bytes\n", sk, n);
505
506	hdr = (struct nlmsghdr *) buf;
507	while (nlmsg_ok(hdr, n)) {
508		NL_DBG(3, "recgmsgs(%p): Processing valid message...\n", sk);
509
510		nlmsg_free(msg);
511		msg = nlmsg_convert(hdr);
512		if (!msg) {
513			err = -NLE_NOMEM;
514			goto out;
515		}
516
517		nlmsg_set_proto(msg, sk->s_proto);
518		nlmsg_set_src(msg, &nla);
519		if (creds)
520			nlmsg_set_creds(msg, creds);
521
522		/* Raw callback is the first, it gives the most control
523		 * to the user and he can do his very own parsing. */
524		if (cb->cb_set[NL_CB_MSG_IN])
525			NL_CB_CALL(cb, NL_CB_MSG_IN, msg);
526
527		/* Sequence number checking. The check may be done by
528		 * the user, otherwise a very simple check is applied
529		 * enforcing strict ordering */
530		if (cb->cb_set[NL_CB_SEQ_CHECK])
531			NL_CB_CALL(cb, NL_CB_SEQ_CHECK, msg);
532		else if (hdr->nlmsg_seq != sk->s_seq_expect) {
533			if (cb->cb_set[NL_CB_INVALID])
534				NL_CB_CALL(cb, NL_CB_INVALID, msg);
535			else {
536				err = -NLE_SEQ_MISMATCH;
537				goto out;
538			}
539		}
540
541		if (hdr->nlmsg_type == NLMSG_DONE ||
542		    hdr->nlmsg_type == NLMSG_ERROR ||
543		    hdr->nlmsg_type == NLMSG_NOOP ||
544		    hdr->nlmsg_type == NLMSG_OVERRUN) {
545			/* We can't check for !NLM_F_MULTI since some netlink
546			 * users in the kernel are broken. */
547			sk->s_seq_expect++;
548			NL_DBG(3, "recvmsgs(%p): Increased expected " \
549			       "sequence number to %d\n",
550			       sk, sk->s_seq_expect);
551		}
552
553		if (hdr->nlmsg_flags & NLM_F_MULTI)
554			multipart = 1;
555
556		/* Other side wishes to see an ack for this message */
557		if (hdr->nlmsg_flags & NLM_F_ACK) {
558			if (cb->cb_set[NL_CB_SEND_ACK])
559				NL_CB_CALL(cb, NL_CB_SEND_ACK, msg);
560			else {
561				/* FIXME: implement */
562			}
563		}
564
565		/* messages terminates a multpart message, this is
566		 * usually the end of a message and therefore we slip
567		 * out of the loop by default. the user may overrule
568		 * this action by skipping this packet. */
569		if (hdr->nlmsg_type == NLMSG_DONE) {
570			multipart = 0;
571			if (cb->cb_set[NL_CB_FINISH])
572				NL_CB_CALL(cb, NL_CB_FINISH, msg);
573		}
574
575		/* Message to be ignored, the default action is to
576		 * skip this message if no callback is specified. The
577		 * user may overrule this action by returning
578		 * NL_PROCEED. */
579		else if (hdr->nlmsg_type == NLMSG_NOOP) {
580			if (cb->cb_set[NL_CB_SKIPPED])
581				NL_CB_CALL(cb, NL_CB_SKIPPED, msg);
582			else
583				goto skip;
584		}
585
586		/* Data got lost, report back to user. The default action is to
587		 * quit parsing. The user may overrule this action by retuning
588		 * NL_SKIP or NL_PROCEED (dangerous) */
589		else if (hdr->nlmsg_type == NLMSG_OVERRUN) {
590			if (cb->cb_set[NL_CB_OVERRUN])
591				NL_CB_CALL(cb, NL_CB_OVERRUN, msg);
592			else {
593				err = -NLE_MSG_OVERFLOW;
594				goto out;
595			}
596		}
597
598		/* Message carries a nlmsgerr */
599		else if (hdr->nlmsg_type == NLMSG_ERROR) {
600			struct nlmsgerr *e = nlmsg_data(hdr);
601
602			if (hdr->nlmsg_len < nlmsg_msg_size(sizeof(*e))) {
603				/* Truncated error message, the default action
604				 * is to stop parsing. The user may overrule
605				 * this action by returning NL_SKIP or
606				 * NL_PROCEED (dangerous) */
607				if (cb->cb_set[NL_CB_INVALID])
608					NL_CB_CALL(cb, NL_CB_INVALID, msg);
609				else {
610					err = -NLE_MSG_TRUNC;
611					goto out;
612				}
613			} else if (e->error) {
614				/* Error message reported back from kernel. */
615				if (cb->cb_err) {
616					err = cb->cb_err(&nla, e,
617							   cb->cb_err_arg);
618					if (err < 0)
619						goto out;
620					else if (err == NL_SKIP)
621						goto skip;
622					else if (err == NL_STOP) {
623						err = -nl_syserr2nlerr(e->error);
624						goto out;
625					}
626				} else {
627					err = -nl_syserr2nlerr(e->error);
628					goto out;
629				}
630			} else if (cb->cb_set[NL_CB_ACK])
631				NL_CB_CALL(cb, NL_CB_ACK, msg);
632		} else {
633			/* Valid message (not checking for MULTIPART bit to
634			 * get along with broken kernels. NL_SKIP has no
635			 * effect on this.  */
636			if (cb->cb_set[NL_CB_VALID])
637				NL_CB_CALL(cb, NL_CB_VALID, msg);
638		}
639skip:
640		err = 0;
641		hdr = nlmsg_next(hdr, &n);
642	}
643
644	nlmsg_free(msg);
645	free(buf);
646	free(creds);
647	buf = NULL;
648	msg = NULL;
649	creds = NULL;
650
651	if (multipart) {
652		/* Multipart message not yet complete, continue reading */
653		goto continue_reading;
654	}
655stop:
656	err = 0;
657out:
658	nlmsg_free(msg);
659	free(buf);
660	free(creds);
661
662	return err;
663}
664
665/**
666 * Receive a set of messages from a netlink socket.
667 * @arg sk		Netlink socket.
668 * @arg cb		set of callbacks to control behaviour.
669 *
670 * Repeatedly calls nl_recv() or the respective replacement if provided
671 * by the application (see nl_cb_overwrite_recv()) and parses the
672 * received data as netlink messages. Stops reading if one of the
673 * callbacks returns NL_STOP or nl_recv returns either 0 or a negative error code.
674 *
675 * A non-blocking sockets causes the function to return immediately if
676 * no data is available.
677 *
678 * @return 0 on success or a negative error code from nl_recv().
679 */
680int nl_recvmsgs(struct nl_sock *sk, struct nl_cb *cb)
681{
682	if (cb->cb_recvmsgs_ow)
683		return cb->cb_recvmsgs_ow(sk, cb);
684	else
685		return recvmsgs(sk, cb);
686}
687
688
689static int ack_wait_handler(struct nl_msg *msg, void *arg)
690{
691	return NL_STOP;
692}
693
694/**
695 * Wait for ACK.
696 * @arg sk		Netlink socket.
697 * @pre The netlink socket must be in blocking state.
698 *
699 * Waits until an ACK is received for the latest not yet acknowledged
700 * netlink message.
701 */
702int nl_wait_for_ack(struct nl_sock *sk)
703{
704	int err;
705	struct nl_cb *cb;
706
707	cb = nl_cb_clone(sk->s_cb);
708	if (cb == NULL)
709		return -NLE_NOMEM;
710
711	nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, ack_wait_handler, NULL);
712	err = nl_recvmsgs(sk, cb);
713	nl_cb_put(cb);
714
715	return err;
716}
717
718/** @} */
719
720/** @} */
721