1/*
2 * Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package sun.nio.ch;
27
28import java.io.IOException;
29import java.security.AccessController;
30import java.util.BitSet;
31import java.util.HashMap;
32import java.util.Map;
33import sun.security.action.GetIntegerAction;
34
35/**
36 * Manipulates a native array of epoll_event structs on Linux:
37 *
38 * typedef union epoll_data {
39 *     void *ptr;
40 *     int fd;
41 *     __uint32_t u32;
42 *     __uint64_t u64;
43 *  } epoll_data_t;
44 *
45 * struct epoll_event {
46 *     __uint32_t events;
47 *     epoll_data_t data;
48 * };
49 *
50 * The system call to wait for I/O events is epoll_wait(2). It populates an
51 * array of epoll_event structures that are passed to the call. The data
52 * member of the epoll_event structure contains the same data as was set
53 * when the file descriptor was registered to epoll via epoll_ctl(2). In
54 * this implementation we set data.fd to be the file descriptor that we
55 * register. That way, we have the file descriptor available when we
56 * process the events.
57 */
58
59class EPollArrayWrapper {
60    // EPOLL_EVENTS
61    private static final int EPOLLIN      = 0x001;
62
63    // opcodes
64    private static final int EPOLL_CTL_ADD      = 1;
65    private static final int EPOLL_CTL_DEL      = 2;
66    private static final int EPOLL_CTL_MOD      = 3;
67
68    // Miscellaneous constants
69    private static final int SIZE_EPOLLEVENT  = sizeofEPollEvent();
70    private static final int EVENT_OFFSET     = 0;
71    private static final int DATA_OFFSET      = offsetofData();
72    private static final int FD_OFFSET        = DATA_OFFSET;
73    private static final int OPEN_MAX         = IOUtil.fdLimit();
74    private static final int NUM_EPOLLEVENTS  = Math.min(OPEN_MAX, 8192);
75
76    // Special value to indicate that an update should be ignored
77    private static final byte  KILLED = (byte)-1;
78
79    // Initial size of arrays for fd registration changes
80    private static final int INITIAL_PENDING_UPDATE_SIZE = 64;
81
82    // maximum size of updatesLow
83    private static final int MAX_UPDATE_ARRAY_SIZE = AccessController.doPrivileged(
84        new GetIntegerAction("sun.nio.ch.maxUpdateArraySize", Math.min(OPEN_MAX, 64*1024)));
85
86    // The fd of the epoll driver
87    private final int epfd;
88
89     // The epoll_event array for results from epoll_wait
90    private final AllocatedNativeObject pollArray;
91
92    // Base address of the epoll_event array
93    private final long pollArrayAddress;
94
95    // The fd of the interrupt line going out
96    private int outgoingInterruptFD;
97
98    // The fd of the interrupt line coming in
99    private int incomingInterruptFD;
100
101    // The index of the interrupt FD
102    private int interruptedIndex;
103
104    // Number of updated pollfd entries
105    int updated;
106
107    // object to synchronize fd registration changes
108    private final Object updateLock = new Object();
109
110    // number of file descriptors with registration changes pending
111    private int updateCount;
112
113    // file descriptors with registration changes pending
114    private int[] updateDescriptors = new int[INITIAL_PENDING_UPDATE_SIZE];
115
116    // events for file descriptors with registration changes pending, indexed
117    // by file descriptor and stored as bytes for efficiency reasons. For
118    // file descriptors higher than MAX_UPDATE_ARRAY_SIZE (unlimited case at
119    // least) then the update is stored in a map.
120    private final byte[] eventsLow = new byte[MAX_UPDATE_ARRAY_SIZE];
121    private final Map<Integer,Byte> eventsHigh = new HashMap<>();
122
123    // Used by release and updateRegistrations to track whether a file
124    // descriptor is registered with epoll.
125    private final BitSet registered = new BitSet();
126
127
128    EPollArrayWrapper() throws IOException {
129        // creates the epoll file descriptor
130        epfd = epollCreate();
131
132        // the epoll_event array passed to epoll_wait
133        int allocationSize = NUM_EPOLLEVENTS * SIZE_EPOLLEVENT;
134        pollArray = new AllocatedNativeObject(allocationSize, true);
135        pollArrayAddress = pollArray.address();
136    }
137
138    void initInterrupt(int fd0, int fd1) {
139        outgoingInterruptFD = fd1;
140        incomingInterruptFD = fd0;
141        epollCtl(epfd, EPOLL_CTL_ADD, fd0, EPOLLIN);
142    }
143
144    void putEventOps(int i, int event) {
145        int offset = SIZE_EPOLLEVENT * i + EVENT_OFFSET;
146        pollArray.putInt(offset, event);
147    }
148
149    void putDescriptor(int i, int fd) {
150        int offset = SIZE_EPOLLEVENT * i + FD_OFFSET;
151        pollArray.putInt(offset, fd);
152    }
153
154    int getEventOps(int i) {
155        int offset = SIZE_EPOLLEVENT * i + EVENT_OFFSET;
156        return pollArray.getInt(offset);
157    }
158
159    int getDescriptor(int i) {
160        int offset = SIZE_EPOLLEVENT * i + FD_OFFSET;
161        return pollArray.getInt(offset);
162    }
163
164    /**
165     * Returns {@code true} if updates for the given key (file
166     * descriptor) are killed.
167     */
168    private boolean isEventsHighKilled(Integer key) {
169        assert key >= MAX_UPDATE_ARRAY_SIZE;
170        Byte value = eventsHigh.get(key);
171        return (value != null && value == KILLED);
172    }
173
174    /**
175     * Sets the pending update events for the given file descriptor. This
176     * method has no effect if the update events is already set to KILLED,
177     * unless {@code force} is {@code true}.
178     */
179    private void setUpdateEvents(int fd, byte events, boolean force) {
180        if (fd < MAX_UPDATE_ARRAY_SIZE) {
181            if ((eventsLow[fd] != KILLED) || force) {
182                eventsLow[fd] = events;
183            }
184        } else {
185            Integer key = Integer.valueOf(fd);
186            if (!isEventsHighKilled(key) || force) {
187                eventsHigh.put(key, Byte.valueOf(events));
188            }
189        }
190    }
191
192    /**
193     * Returns the pending update events for the given file descriptor.
194     */
195    private byte getUpdateEvents(int fd) {
196        if (fd < MAX_UPDATE_ARRAY_SIZE) {
197            return eventsLow[fd];
198        } else {
199            Byte result = eventsHigh.get(Integer.valueOf(fd));
200            // result should never be null
201            return result.byteValue();
202        }
203    }
204
205    /**
206     * Update the events for a given file descriptor
207     */
208    void setInterest(int fd, int mask) {
209        synchronized (updateLock) {
210            // record the file descriptor and events
211            int oldCapacity = updateDescriptors.length;
212            if (updateCount == oldCapacity) {
213                int newCapacity = oldCapacity + INITIAL_PENDING_UPDATE_SIZE;
214                int[] newDescriptors = new int[newCapacity];
215                System.arraycopy(updateDescriptors, 0, newDescriptors, 0, oldCapacity);
216                updateDescriptors = newDescriptors;
217            }
218            updateDescriptors[updateCount++] = fd;
219
220            // events are stored as bytes for efficiency reasons
221            byte b = (byte)mask;
222            assert (b == mask) && (b != KILLED);
223            setUpdateEvents(fd, b, false);
224        }
225    }
226
227    /**
228     * Add a file descriptor
229     */
230    void add(int fd) {
231        // force the initial update events to 0 as it may be KILLED by a
232        // previous registration.
233        synchronized (updateLock) {
234            assert !registered.get(fd);
235            setUpdateEvents(fd, (byte)0, true);
236        }
237    }
238
239    /**
240     * Remove a file descriptor
241     */
242    void remove(int fd) {
243        synchronized (updateLock) {
244            // kill pending and future update for this file descriptor
245            setUpdateEvents(fd, KILLED, false);
246
247            // remove from epoll
248            if (registered.get(fd)) {
249                epollCtl(epfd, EPOLL_CTL_DEL, fd, 0);
250                registered.clear(fd);
251            }
252        }
253    }
254
255    /**
256     * Close epoll file descriptor and free poll array
257     */
258    void closeEPollFD() throws IOException {
259        FileDispatcherImpl.closeIntFD(epfd);
260        pollArray.free();
261    }
262
263    int poll(long timeout) throws IOException {
264        updateRegistrations();
265        updated = epollWait(pollArrayAddress, NUM_EPOLLEVENTS, timeout, epfd);
266        for (int i=0; i<updated; i++) {
267            if (getDescriptor(i) == incomingInterruptFD) {
268                interruptedIndex = i;
269                interrupted = true;
270                break;
271            }
272        }
273        return updated;
274    }
275
276    /**
277     * Update the pending registrations.
278     */
279    private void updateRegistrations() {
280        synchronized (updateLock) {
281            int j = 0;
282            while (j < updateCount) {
283                int fd = updateDescriptors[j];
284                short events = getUpdateEvents(fd);
285                boolean isRegistered = registered.get(fd);
286                int opcode = 0;
287
288                if (events != KILLED) {
289                    if (isRegistered) {
290                        opcode = (events != 0) ? EPOLL_CTL_MOD : EPOLL_CTL_DEL;
291                    } else {
292                        opcode = (events != 0) ? EPOLL_CTL_ADD : 0;
293                    }
294                    if (opcode != 0) {
295                        epollCtl(epfd, opcode, fd, events);
296                        if (opcode == EPOLL_CTL_ADD) {
297                            registered.set(fd);
298                        } else if (opcode == EPOLL_CTL_DEL) {
299                            registered.clear(fd);
300                        }
301                    }
302                }
303                j++;
304            }
305            updateCount = 0;
306        }
307    }
308
309    // interrupt support
310    private boolean interrupted = false;
311
312    public void interrupt() {
313        interrupt(outgoingInterruptFD);
314    }
315
316    public int interruptedIndex() {
317        return interruptedIndex;
318    }
319
320    boolean interrupted() {
321        return interrupted;
322    }
323
324    void clearInterrupted() {
325        interrupted = false;
326    }
327
328    static {
329        IOUtil.load();
330        init();
331    }
332
333    private native int epollCreate();
334    private native void epollCtl(int epfd, int opcode, int fd, int events);
335    private native int epollWait(long pollAddress, int numfds, long timeout,
336                                 int epfd) throws IOException;
337    private static native int sizeofEPollEvent();
338    private static native int offsetofData();
339    private static native void interrupt(int fd);
340    private static native void init();
341}
342