1/*
2 * Copyright (c) 2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1.  Redistributions of source code must retain the above copyright
11 *     notice, this list of conditions and the following disclaimer.
12 * 2.  Redistributions in binary form must reproduce the above copyright
13 *     notice, this list of conditions and the following disclaimer in the
14 *     documentation and/or other materials provided with the distribution.
15 * 3.  Neither the name of Apple Inc. ("Apple") nor the names of its
16 *     contributors may be used to endorse or promote products derived from
17 *     this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
23 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * Portions of this software have been released under the following terms:
31 *
32 * (c) Copyright 1989-1993 OPEN SOFTWARE FOUNDATION, INC.
33 * (c) Copyright 1989-1993 HEWLETT-PACKARD COMPANY
34 * (c) Copyright 1989-1993 DIGITAL EQUIPMENT CORPORATION
35 *
36 * To anyone who acknowledges that this file is provided "AS IS"
37 * without any express or implied warranty:
38 * permission to use, copy, modify, and distribute this file for any
39 * purpose is hereby granted without fee, provided that the above
40 * copyright notices and this notice appears in all source code copies,
41 * and that none of the names of Open Software Foundation, Inc., Hewlett-
42 * Packard Company or Digital Equipment Corporation be used
43 * in advertising or publicity pertaining to distribution of the software
44 * without specific, written prior permission.  Neither Open Software
45 * Foundation, Inc., Hewlett-Packard Company nor Digital
46 * Equipment Corporation makes any representations about the suitability
47 * of this software for any purpose.
48 *
49 * Copyright (c) 2007, Novell, Inc. All rights reserved.
50 * Redistribution and use in source and binary forms, with or without
51 * modification, are permitted provided that the following conditions
52 * are met:
53 *
54 * 1.  Redistributions of source code must retain the above copyright
55 *     notice, this list of conditions and the following disclaimer.
56 * 2.  Redistributions in binary form must reproduce the above copyright
57 *     notice, this list of conditions and the following disclaimer in the
58 *     documentation and/or other materials provided with the distribution.
59 * 3.  Neither the name of Novell Inc. nor the names of its contributors
60 *     may be used to endorse or promote products derived from this
61 *     this software without specific prior written permission.
62 *
63 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
64 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
65 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
66 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY
67 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
68 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
69 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
70 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
71 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
72 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
73 *
74 * @APPLE_LICENSE_HEADER_END@
75 */
76
77/*
78**
79**  NAME:
80**
81**      rpcdsliv.c
82**
83**  FACILITY:
84**
85**      RPC Daemon
86**
87**  ABSTRACT:
88**
89**      RPCD  Server Liveness Module.  Tasks to periodically ping servers
90**      which are registered in the endpoint database and mark them for
91**      deletion from the database if they do not respond.  One task
92**      also purges entries which are marked as deleted and which have
93**      no read references to them.
94**
95**
96**
97*/
98
99#include <commonp.h>
100#include <com.h>
101
102#include <dce/ep.h>     /* derived from ep.idl */
103#include <dsm.h>        /* derived from dsm.idl */
104
105#include <rpcdp.h>
106#include <rpcddb.h>
107#include <rpcdepdbp.h>
108#include <rpcdutil.h>
109
110#define slive_c_long_wait   (15*60) /* 15 minutes */
111#define slive_c_short_wait  (1*60)  /* 1 minute */
112
113/*  RPC comm timeout for ping of a "good" server
114 *  (ie. a server which has been communicating)
115 */
116#define slive_c_short_comm_timeout          3
117
118/*  Number of consecutive failures to communicate with a server
119 *  before it is deemed to be dead and is removed from the database
120 */
121#define slive_c_max_server_not_listening    20
122
123INTERNAL void sliv_task1
124    (
125        void    *arg
126    );
127
128INTERNAL void sliv_task2
129    (
130        void    *arg
131    );
132
133INTERNAL boolean32 ping_server
134    (
135        db_entry_t      *entp,
136        unsigned32      timeout,
137        error_status_t  *status
138    );
139
140
141/*  Start server alive tasks and init condition variable
142 *  used by task1 to tell task2 that it should aggressively
143 *  ping a server
144 *
145 *  NB: this routine must be called after the db locks have been
146 *  inited
147 */
148PRIVATE void sliv_init(h, status)
149struct db       *h;
150error_status_t  *status;
151{
152    dcethread_cond_init_throw(&h->sliv_task2_cv, NULL);
153
154    dcethread_create_throw(&h->sliv_task1_h, NULL,
155            (void*) sliv_task1, (void *) h);
156
157    dcethread_create_throw(&h->sliv_task2_h, NULL,
158            (void*) sliv_task2, (void *) h);
159
160    *status = error_status_ok;
161}
162
163/*  Task1 runs a few times an hour
164 *  It purges entries which are marked as deleted and
165 *      have no read references to them.
166 *  It also pings servers which have been reachable.
167 *  If a server becomes not reachable, its destiny is passed
168 *  off to Task2 which pings it more frequently and will
169 *  mark it for deletion if it isn't reachable after
170 *  slive_c_max_server_not_listening consecutive tries.
171 */
172
173INTERNAL void sliv_task1(arg)
174void    *arg;
175{
176#define slive_c_max_deletes 5
177
178    struct db       *h;
179    struct timeval  now;
180    struct timezone tz;
181    unsigned32      ndeletes;
182    db_lists_t      *lp,
183                    *lp_next;
184    db_entry_t      *entp;
185    boolean32       server_listening;
186    error_status_t  status;
187
188    h = (struct db *) arg;
189
190    gettimeofday(&now, &tz);
191
192    while (true)
193    {
194        ru_sleep_until(&now, slive_c_long_wait);
195
196        gettimeofday(&now, &tz);
197
198        db_lock(h);
199
200        ndeletes = 0;
201        for (lp = db_list_first(&h->lists_mgmt, db_c_entry_list, NULL);
202                lp != NULL; lp = lp_next)
203        {
204            /*
205             *  Point to next entry in list now because
206             *  may delete this entry and remove it from
207             *  list
208             */
209            lp_next = db_list_next(db_c_entry_list, lp);
210
211            entp = (db_entry_t *) lp;
212
213            /*  If have done lots of deletes
214             *  unlock db for a while so more
215             *  important things can happen
216             */
217            if (ndeletes > slive_c_max_deletes)
218            {
219                ndeletes = 0;
220                entp->read_nrefs++;
221                db_unlock(h);
222
223                ru_sleep(60);
224
225                db_lock(h);
226                entp->read_nrefs--;
227            }
228
229            if (entp->delete_flag)
230            {
231                if (entp->read_nrefs == 0)
232                {
233                    epdb_delete_entry(h, entp, &status);
234                    ndeletes++;
235
236                    if (dflag)
237                        printf("sliv_task1 deleting server\n");
238                }
239            }
240            else
241            if (entp->ncomm_fails == 0)
242            {
243                entp->read_nrefs++;
244                db_unlock(h);
245
246                dcethread_checkinterrupt();
247
248                server_listening = ping_server(entp, slive_c_short_comm_timeout, &status);
249
250                db_lock(h);
251                entp->read_nrefs--;
252
253                if (!server_listening)
254                {
255                    entp->ncomm_fails++;
256                    dcethread_cond_signal_throw(&h->sliv_task2_cv);
257                }
258            }
259
260        }   /* end entry list loop */
261
262        db_unlock(h);
263    }
264}
265
266INTERNAL void sliv_task2(arg)
267void    *arg;
268{
269    struct db       *h;
270    struct timeval  now;
271    struct timezone tz;
272    struct timespec waketime;
273    volatile unsigned32      waitsecs;
274    volatile boolean32       have_db_lock;
275    db_lists_t      *lp;
276    db_entry_t      *entp;
277    boolean32       server_listening;
278    error_status_t  status;
279
280    //DO_NOT_CLOBBER(waitsecs);
281    //DO_NOT_CLOBBER(have_db_lock);
282
283    h = (struct db *) arg;
284
285    /*  let other init stuff get done */
286    ru_sleep(180);
287
288    gettimeofday(&now, &tz);
289    waitsecs = slive_c_long_wait;
290
291    db_lock(h);
292    have_db_lock = true;
293
294    DCETHREAD_TRY
295    {
296        while (true)
297        {
298			  int __istat;
299            waketime.tv_sec = now.tv_sec + waitsecs + 1;
300            waketime.tv_nsec = 0;
301
302            /*  release lock and wait for task2 event or timeout or cancel
303             */
304
305				do	{
306                                    __istat = dcethread_cond_timedwait_throw(&h->sliv_task2_cv, &h->lock, &waketime);
307				} while(__istat == EINTR);
308
309            /*  have lock now
310             */
311
312            gettimeofday(&now, &tz);
313            waitsecs = slive_c_long_wait;   /* so far no bad servers */
314
315            for (lp = db_list_first(&h->lists_mgmt, db_c_entry_list, NULL);
316                    lp != NULL; lp = db_list_next(db_c_entry_list, lp))
317            {
318                entp = (db_entry_t *) lp;
319
320                if ((entp->ncomm_fails > 0) && (!entp->delete_flag))
321                {
322                    entp->read_nrefs++;
323                    have_db_lock = false;
324                    db_unlock(h);
325
326                    dcethread_checkinterrupt();
327
328                    server_listening = ping_server(entp, rpc_c_binding_default_timeout, &status);
329
330                    db_lock(h);
331                    have_db_lock = true;
332                    entp->read_nrefs--;
333
334                    if (!server_listening)
335                    {
336                        waitsecs = slive_c_short_wait;
337                        entp->ncomm_fails++;
338                        if (entp->ncomm_fails >= slive_c_max_server_not_listening)
339                        {
340                            /*  Haven't communicated with server for
341                             *  slive_c_max_server_not_listening consecutive tries
342                             *  so mark entry as deleted in memory and on disk.
343                             *  Task1 will actually purge entry from database
344                             *  (it needs to do some extra bookkeeping so let's
345                             *  keep that in one place)
346                             */
347                            entp->delete_flag = true;
348                            db_update_entry(h, entp, &status);
349                            if (dflag)
350                                printf("sliv_task2 marking server for deletion\n");
351                        }
352                    }
353                    else
354                    {
355                        entp->ncomm_fails = 0;
356                    }
357                }
358            } /* end entry list loop */
359        } /* end while loop */
360
361    }   /* DCETHREAD_TRY */
362    DCETHREAD_CATCH_ALL(THIS_CATCH)
363    {
364        /*  received cancel or some other exception.
365         *  just unlock database and exit task
366         */
367        if (have_db_lock)
368            db_unlock(h);
369        DCETHREAD_RERAISE;
370    }
371    DCETHREAD_ENDTRY
372}
373
374INTERNAL boolean32 ping_server(entp, timeout, status)
375db_entry_t      *entp;
376unsigned32      timeout;
377error_status_t  *status;
378{
379    rpc_binding_handle_t    binding_h;
380    boolean32               is_listening;
381    error_status_t          tmp_st;
382
383    rpc_tower_to_binding(entp->tower.tower_octet_string, &binding_h, status);
384    if (! STATUS_OK(status)) return(true);      /* what to do? */
385
386    if (timeout != rpc_c_binding_default_timeout)
387    {
388        rpc_mgmt_set_com_timeout(binding_h, timeout, status);
389    }
390
391    is_listening = rpc_mgmt_is_server_listening(binding_h, status);
392
393    rpc_binding_free(&binding_h, &tmp_st);
394
395    return(is_listening);
396}
397