1/*
2 * Copyright (c) 2009, 2011, ETH Zurich.
3 * All rights reserved.
4 *
5 * This file is distributed under the terms in the attached LICENSE file.
6 * If you do not find this file, copies can be found by writing to:
7 * ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group.
8 */
9
10#define _USE_XOPEN // for strdup()
11#include <stdlib.h>
12#include <stdio.h>
13#include <string.h>
14#include <barrelfish/barrelfish.h>
15#include <barrelfish/nameservice_client.h>
16#include <barrelfish/waitset.h>
17#include <barrelfish/debug.h>
18#include <nfs/nfs.h>
19#include <vfs/vfs.h>
20#include <vfs/vfs_path.h>
21
22// networking stuff
23#include <net_sockets/net_sockets.h>
24// #include <lwip/netif.h>
25// #include <lwip/dhcp.h>
26// #include <netif/etharp.h>
27// #include <lwip/init.h>
28// #include <lwip/tcp.h>
29// #include <netif/bfeth.h>
30// #include <lwip/ip_addr.h>
31
32#include "vfs_backends.h"
33
34/// Define to enable asynchronous writes
35//#define ASYNC_WRITES
36
37#define MAX_NFS_READ_CHUNKS  40  // FIXME: Not used anymore, should be removed
38
39//#define NONBLOCKING_NFS_READ   1
40#define MAX_NFS_READ_BYTES   14000
41
42/* SG,2017-10-04: lowered from 1330, now works with e1000_net_sockets_server */
43#define MAX_NFS_WRITE_BYTES  1300
44/* SG,2017-10-04: raised from 1, still works with e1000_net_sockets_server */
45#define MAX_NFS_WRITE_CHUNKS 4
46#define NFS_WRITE_STABILITY  UNSTABLE
47
48#define assert_err(e,m)     \
49do {                        \
50    if (err_is_fail(e)) {   \
51        DEBUG_ERR(e,m);     \
52        abort();            \
53    }                       \
54} while (0)
55
56#include "vfs_nfs.h"
57
58// condition used to singal controlling code to wait for a condition
59static bool wait_flag;
60
61static void wait_for_condition(void)
62{
63    while (!wait_flag) {
64        errval_t err = event_dispatch(get_default_waitset());
65        assert(err_is_ok(err));
66    }
67    wait_flag = false;
68}
69
70static void signal_condition(void)
71{
72    wait_flag = true;
73}
74
75typedef void resolve_cont_fn(void *st, errval_t err, struct nfs_fh3 fh,
76                             struct fattr3 *fattr /* optional */);
77
78// state for an ongoing multi-component name resolution op
79struct nfs_resolve_state {
80    struct nfs_state *nfs;
81    const char *path;
82    int path_pos;
83    bool islast;
84    struct nfs_fh3 curfh;
85    resolve_cont_fn *cont;
86    void *cont_st;
87};
88
89static void resolve_lookup_cb(void *arg, struct nfs_client *client,
90                              LOOKUP3res *result)
91{
92    LOOKUP3resok *resok = &result->LOOKUP3res_u.resok;
93    struct nfs_resolve_state *st = arg;
94
95    if (result == NULL || result->status != NFS3_OK
96        || !resok->obj_attributes.attributes_follow) { // failed
97
98        st->cont(st->cont_st, FS_ERR_NOTFOUND, NULL_NFS_FH, NULL);
99out:
100        free(st);
101        xdr_LOOKUP3res(&xdr_free, result);
102        return;
103    }
104
105    // was this the last lookup?
106    if (st->islast) {
107        st->cont(st->cont_st, SYS_ERR_OK, resok->object,
108                 &resok->obj_attributes.post_op_attr_u.attributes);
109        goto out;
110    } else if (resok->obj_attributes.post_op_attr_u.attributes.type != NF3DIR) {
111        // must be a directory to recurse
112        st->cont(st->cont_st, FS_ERR_NOTFOUND, resok->object, NULL);
113        goto out;
114    }
115
116    // recurse...
117    st->curfh = resok->object;
118
119    // copy next chunk of path to private buffer
120    while (st->path[st->path_pos] == VFS_PATH_SEP) {
121        st->path_pos++;
122    }
123
124    char *nextsep = strchr(&st->path[st->path_pos], VFS_PATH_SEP);
125    size_t nextlen;
126    if (nextsep == NULL) {
127        st->islast = true;
128        nextlen = strlen(&st->path[st->path_pos]);
129    } else {
130        nextlen = nextsep - &st->path[st->path_pos];
131    }
132    char pathbuf[nextlen + 1];
133    memcpy(pathbuf, &st->path[st->path_pos], nextlen);
134    pathbuf[nextlen] = '\0';
135    st->path_pos += nextlen + 1;
136
137    errval_t e = nfs_lookup(st->nfs->client, st->curfh, pathbuf, resolve_lookup_cb,
138                         st);
139    assert(e == SYS_ERR_OK);
140
141    // free arguments
142    xdr_LOOKUP3res(&xdr_free, result);
143}
144
145static void initiate_resolve(struct nfs_state *nfs, const char *path,
146                             resolve_cont_fn *cont, void *cont_st)
147{
148    if (strlen(path) == (size_t)0) { // Resolving the root of the mount point
149        cont(cont_st, SYS_ERR_OK, nfs->rootfh, NULL);
150        return;
151    }
152
153    struct nfs_resolve_state *st = malloc(sizeof(struct nfs_resolve_state));
154    assert(st != NULL);
155
156    assert(path != NULL);
157
158    st->nfs = nfs;
159    st->path = path;
160    st->path_pos = 0;
161    st->curfh = nfs->rootfh;
162    st->cont = cont;
163    st->cont_st = cont_st;
164
165    // skip leading '/'s
166    while (st->path[st->path_pos] == VFS_PATH_SEP) {
167        st->path_pos++;
168    }
169
170    // locate first component of path
171    char *nextsep = strchr(&st->path[st->path_pos], VFS_PATH_SEP);
172    size_t nextlen;
173    if (nextsep == NULL) {
174        st->islast = true;
175        nextlen = strlen(&st->path[st->path_pos]);
176    } else {
177        st->islast = false;
178        nextlen = nextsep - &st->path[st->path_pos];
179    }
180
181    char pathbuf[nextlen + 1];
182    memcpy(pathbuf, &st->path[st->path_pos], nextlen);
183    pathbuf[nextlen] = '\0';
184    st->path_pos += nextlen + 1;
185
186    // initiate the first lookup
187    errval_t e = nfs_lookup(nfs->client, st->curfh, pathbuf, resolve_lookup_cb, st);
188    assert(e == SYS_ERR_OK);
189}
190
191struct nfs_file_io_handle {
192    void        *data;
193    size_t      offset;
194    size_t      size;
195    size_t      size_complete;
196    nfs_fh3     handle;
197    int         chunk_count;
198    size_t      chunk_pos;
199    int         chunks_in_progress;
200    nfsstat3    status;
201    struct nfs_handle *back_fh;
202};
203
204struct nfs_file_parallel_io_handle {
205    struct nfs_file_io_handle    *fh;
206    size_t                  chunk_start;
207    size_t                  chunk_size;
208};
209
210static void read_callback(void *arg, struct nfs_client *client, READ3res *result)
211{
212    struct nfs_file_parallel_io_handle *pfh = arg;
213
214    assert(result != NULL);
215    // error
216    if (result->status != NFS3_OK) {
217        pfh->fh->status = result->status;
218        free(pfh);
219        signal_condition();
220        goto out;
221    }
222
223    READ3resok *res = &result->READ3res_u.resok;
224    assert(res->count == res->data.data_len);
225    assert(res->data.data_len <= pfh->chunk_size);
226
227    // copy the data
228    memcpy((char *)pfh->fh->data + pfh->chunk_start, res->data.data_val,
229           res->data.data_len);
230    pfh->fh->size_complete += res->data.data_len;
231
232    // is this the end of the file?
233    if (res->eof) {
234        // reduce the file size to match whatever we got and avoid useless calls
235        size_t newsize = pfh->chunk_start + res->data.data_len;
236        if (pfh->fh->size > newsize) {
237            pfh->fh->size = newsize;
238        }
239    }
240    // check whether the whole chunk was transmitted
241    else if (res->data.data_len < pfh->chunk_size) {
242        debug_printf("unexpected short read\n");
243        pfh->chunk_start += res->data.data_len;
244        pfh->chunk_size -= res->data.data_len;
245
246        errval_t e = nfs_read(client, pfh->fh->handle,
247                           pfh->fh->offset + pfh->chunk_start,
248                           pfh->chunk_size, read_callback, pfh);
249        assert(e == SYS_ERR_OK);
250
251        goto out;
252    }
253
254    assert(pfh->fh->size >= pfh->fh->size_complete);
255
256    // check whether all chunks have been transmitted
257    if (pfh->fh->size == pfh->fh->size_complete) {
258        free(pfh);
259    }
260    // else create a new request
261    else if (pfh->fh->chunk_pos < pfh->fh->size) {
262        pfh->chunk_start =  pfh->fh->chunk_pos;
263        pfh->chunk_size = MIN(MAX_NFS_READ_BYTES, pfh->fh->size - pfh->chunk_start);
264        pfh->fh->chunk_pos += pfh->chunk_size;
265        pfh->fh->chunks_in_progress++;
266        errval_t r = nfs_read(client, pfh->fh->handle,
267                           pfh->fh->offset + pfh->chunk_start,
268                           pfh->chunk_size, read_callback, pfh);
269        assert(r == SYS_ERR_OK);
270    } else {
271        free(pfh);
272    }
273
274out:
275    pfh->fh->chunks_in_progress--;
276
277    // allow the request thread to resume if we're the last chunk
278    if (pfh->fh->chunks_in_progress == 0) {
279        signal_condition();
280    }
281    // free arguments
282    xdr_READ3res(&xdr_free, result);
283}
284
285static void write_callback(void *arg, struct nfs_client *client, WRITE3res *result)
286{
287    struct nfs_file_parallel_io_handle *pfh = arg;
288
289    assert(result != NULL);
290
291    // error
292    if (result->status != NFS3_OK) {
293        pfh->fh->status = result->status;
294
295#ifdef ASYNC_WRITES
296        printf("write_callback: NFS error status %d\n", result->status);
297
298        pfh->fh->back_fh->inflight--;
299        assert(pfh->fh->back_fh->inflight >= 0);
300        free(pfh->fh);
301#endif
302
303        free(pfh);
304        signal_condition();
305        goto out;
306    }
307
308    WRITE3resok *res = &result->WRITE3res_u.resok;
309    assert(res->count == pfh->chunk_size);
310    pfh->fh->size_complete += res->count;
311
312    assert(pfh->fh->size >= pfh->fh->size_complete);
313
314    // check whether all chunks have been transmitted
315    if (pfh->fh->size == pfh->fh->size_complete) {
316#ifdef ASYNC_WRITES
317        pfh->fh->back_fh->inflight--;
318        assert(pfh->fh->back_fh->inflight >= 0);
319        free(pfh->fh);
320#endif
321
322        signal_condition();
323        free(pfh);
324    }
325    // else create a new request
326    else if (pfh->fh->chunk_pos < pfh->fh->size) {
327        pfh->chunk_start = pfh->fh->chunk_pos;
328        pfh->chunk_size = MIN(MAX_NFS_WRITE_BYTES, pfh->fh->size - pfh->chunk_start);
329        pfh->fh->chunk_pos += pfh->chunk_size;
330        errval_t r = nfs_write(client, pfh->fh->handle,
331                            pfh->fh->offset + pfh->chunk_start,
332                            (char *)pfh->fh->data + pfh->chunk_start, pfh->chunk_size,
333                            NFS_WRITE_STABILITY, write_callback, pfh);
334        assert(r == SYS_ERR_OK);
335    } else {
336        free(pfh);
337    }
338
339out:
340    // free arguments
341    xdr_WRITE3res(&xdr_free, result);
342}
343
344static void open_resolve_cont(void *st, errval_t err, struct nfs_fh3 fh,
345                              struct fattr3 *fattr)
346{
347    struct nfs_handle *h = st;
348
349    if (err_is_ok(err)) {
350        // make a copy of the file handle, because we are returning it to the user
351        nfs_copyfh(&h->fh, fh);
352        if (fattr != NULL) {
353            h->type = fattr->type;
354        }
355    }
356
357    signal_condition();
358}
359
360static errval_t open(void *st, const char *path, vfs_handle_t *rethandle)
361{
362    struct nfs_state *nfs = st;
363    struct nfs_handle *h = malloc(sizeof(struct nfs_handle));
364    assert(h != NULL);
365
366    h->isdir = false;
367    h->u.file.pos = 0;
368    h->nfs = nfs;
369    h->fh = NULL_NFS_FH;
370#ifdef ASYNC_WRITES
371    h->inflight = 0;
372#endif
373#ifdef WITH_META_DATA_CACHE
374    h->filesize_cached = false;
375    h->cached_filesize = 0;
376#endif
377
378    // lwip_mutex_lock();
379    initiate_resolve(nfs, path, open_resolve_cont, h);
380    wait_for_condition();
381    // lwip_mutex_unlock();
382
383    if (h->fh.data_len > 0 && h->type != NF3DIR) {
384        *rethandle = h;
385        return SYS_ERR_OK;
386    } else if (h->fh.data_len > 0 && h->type == NF3DIR) {
387        free(h);
388        return FS_ERR_NOTFILE;
389    } else {
390        free(h);
391        return FS_ERR_NOTFOUND;
392    }
393}
394
395static void create_callback(void *arg, struct nfs_client *client,
396                            CREATE3res *result)
397{
398    struct nfs_handle *h = arg;
399
400    assert(result != NULL);
401
402    // error
403    if (result->status == NFS3_OK) {
404        struct CREATE3resok *res = &result->CREATE3res_u.resok;
405        assert(res->obj.handle_follows);
406        // make a copy of the file handle, because we are returning it to the user
407        nfs_copyfh(&h->fh, res->obj.post_op_fh3_u.handle);
408        if (res->obj_attributes.attributes_follow) {
409            h->type = res->obj_attributes.post_op_attr_u.attributes.type;
410        }
411    } else { // XXX: Proper error handling
412        debug_printf("Error in create_callback %d\n", result->status);
413    }
414
415    signal_condition();
416}
417
418static void create_resolve_cont(void *st, errval_t err, struct nfs_fh3 fh,
419                               struct fattr3 *fattr)
420{
421    struct nfs_handle *h = st;
422
423    if (err_is_fail(err) || (fattr != NULL && fattr->type != NF3DIR)) {
424        DEBUG_ERR(err, "failure in create_resolve_cont");
425        // FIXME: failed to lookup directory. return meaningful error
426        signal_condition();
427        return;
428    }
429
430    static struct sattr3 nulattr;
431    nulattr.mode.set_it = true;
432    nulattr.mode.set_mode3_u.mode = 0666;
433    errval_t r = nfs_create(h->nfs->client, fh, h->st, false, nulattr,
434                         create_callback, h);
435    assert(r == SYS_ERR_OK);
436}
437
438static errval_t create(void *st, const char *path, vfs_handle_t *rethandle)
439{
440    struct nfs_state *nfs = st;
441
442    // find last path component
443    char *filename = strrchr(path, VFS_PATH_SEP);
444    if (filename == NULL) {
445        return FS_ERR_NOTFOUND;
446    }
447
448    // get directory name in separate nul-terminated string buffer
449    char *dir = malloc(filename - path + 1);
450    assert(dir != NULL);
451    memcpy(dir, path, filename - path);
452    dir[filename - path] = '\0';
453
454    // advance past separator
455    filename = filename + 1;
456
457    struct nfs_handle *h = malloc(sizeof(struct nfs_handle));
458    assert(h != NULL);
459
460    h->isdir = false;
461    h->u.file.pos = 0;
462    h->nfs = nfs;
463    h->fh = NULL_NFS_FH;
464    h->st = filename;
465    h->fh.data_len = 0;
466#ifdef ASYNC_WRITES
467    h->inflight = 0;
468#endif
469#ifdef WITH_META_DATA_CACHE
470    h->filesize_cached = false;
471    h->cached_filesize = 0;
472#endif
473
474    // lwip_mutex_lock();
475    initiate_resolve(nfs, dir, create_resolve_cont, h);
476    wait_for_condition();
477    // lwip_mutex_unlock();
478
479    free(dir);
480
481    if (h->fh.data_len > 0) {
482        *rethandle = h;
483        return SYS_ERR_OK;
484    } else {
485        free(h);
486        return FS_ERR_NOTFOUND; // XXX
487    }
488}
489
490static errval_t opendir(void *st, const char *path, vfs_handle_t *rethandle)
491{
492    struct nfs_state *nfs = st;
493    struct nfs_handle *h = malloc(sizeof(struct nfs_handle));
494    assert(h != NULL);
495
496    h->isdir = true;
497    h->u.dir.readdir_result = NULL;
498    h->u.dir.readdir_next = NULL;
499    h->u.dir.readdir_prev = NULL;
500    h->nfs = nfs;
501    h->fh = NULL_NFS_FH;
502#ifdef ASYNC_WRITES
503    h->inflight = 0;
504#endif
505
506    // skip leading '/'s
507    while (*path == VFS_PATH_SEP) {
508        path++;
509    }
510
511    // short-circuit lookup of root dir
512    if (*path == '\0') {
513        nfs_copyfh(&h->fh, nfs->rootfh);
514        h->type = NF3DIR;
515        *rethandle = h;
516        return SYS_ERR_OK;
517    }
518
519    // lwip_mutex_lock();
520    initiate_resolve(nfs, path, open_resolve_cont, h);
521    wait_for_condition();
522    // lwip_mutex_unlock();
523
524    if (h->fh.data_len > 0 && h->type == NF3DIR) {
525        *rethandle = h;
526        return SYS_ERR_OK;
527    } else {
528        free(h);
529        return FS_ERR_NOTFOUND;
530    }
531}
532
533static errval_t read(void *st, vfs_handle_t inhandle, void *buffer,
534        size_t bytes, size_t *bytes_read)
535{
536    struct nfs_state *nfs = st;
537    struct nfs_handle *h = inhandle;
538    assert(h != NULL);
539    errval_t e;
540
541    assert(!h->isdir);
542
543    // set up the handle
544    struct nfs_file_io_handle fh;
545    memset(&fh, 0, sizeof(struct nfs_file_io_handle));
546
547    fh.data = buffer;
548    fh.size = bytes;
549    fh.offset = h->u.file.pos;
550    fh.status = NFS3_OK;
551    fh.handle = h->fh;
552    fh.chunks_in_progress = 0;
553
554    // lwip_mutex_lock();
555
556    // start a parallel load of the file, wait for it to complete
557    int chunks = 0;
558    while (fh.chunk_pos < fh.size && chunks < MAX_NFS_READ_CHUNKS) {
559//    while (fh.chunk_pos < fh.size) {
560        struct nfs_file_parallel_io_handle *pfh =
561            malloc(sizeof(struct nfs_file_parallel_io_handle));
562
563        pfh->fh = &fh;
564        pfh->chunk_start = fh.chunk_pos;
565        pfh->chunk_size = MIN(MAX_NFS_READ_BYTES, fh.size - pfh->chunk_start);
566        fh.chunk_pos += pfh->chunk_size;
567
568        fh.chunks_in_progress++;
569        e = nfs_read(nfs->client, fh.handle, fh.offset + pfh->chunk_start,
570                     pfh->chunk_size, read_callback, pfh);
571
572        if (e == LWIP_ERR_MEM) { // internal resource limit in lwip?
573            printf("read: error in nfs_read ran out of mem!!!\n");
574            printf("read: error chunks %d in progress %d!!!\n",
575                    chunks, (int)fh.chunks_in_progress);
576            fh.chunk_pos -= pfh->chunk_size;
577            free(pfh);
578            break;
579        }
580        assert(e == SYS_ERR_OK);
581        chunks++;
582#ifdef NONBLOCKING_NFS_READ
583        check_and_handle_other_events();
584#endif // NONBLOCKING_NFS_READ
585    }
586    wait_for_condition();
587
588    // lwip_mutex_unlock();
589
590    // check result
591    if (fh.status != NFS3_OK) {
592  //      printf("read:vfs_nfs: fh.status issue %u\n", fh.status);
593        return nfsstat_to_errval(fh.status);
594    }
595
596    assert(fh.size <= bytes);
597    h->u.file.pos += fh.size;
598    *bytes_read = fh.size;
599
600    if (fh.size == 0) {
601        /* XXX: assuming this means EOF, but we really do know from NFS */
602/*        printf("read:vfs_nfs: EOF marking %"PRIuPTR" < %"PRIuPTR","
603                "parallel NFS chunks [%u]\n",
604                fh.size, bytes, MAX_NFS_READ_CHUNKS);
605*/
606        return VFS_ERR_EOF;
607    } else {
608        return SYS_ERR_OK;
609    }
610}
611
612static errval_t write(void *st, vfs_handle_t handle, const void *buffer,
613                      size_t bytes, size_t *bytes_written)
614{
615    struct nfs_state *nfs = st;
616    struct nfs_handle *h = handle;
617    assert(h != NULL);
618    errval_t e;
619
620    #if 0
621    if((__builtin_return_address(2) < (void *)fclose ||
622        __builtin_return_address(2) > (void *)memcpy) &&
623       (__builtin_return_address(3) < (void *)fclose ||
624        __builtin_return_address(3) > (void *)memcpy) &&
625       (__builtin_return_address(4) < (void *)fclose ||
626        __builtin_return_address(4) > (void *)memcpy)) {
627        printf("vfs_nfs->write called not from fclose()! %p, %p, %p, %p\n",
628               __builtin_return_address(0),
629               __builtin_return_address(1),
630               __builtin_return_address(2),
631               __builtin_return_address(3));
632    }
633    #endif
634
635    assert(!h->isdir);
636
637    // set up the handle
638#ifndef ASYNC_WRITES
639    struct nfs_file_io_handle the_fh;
640    struct nfs_file_io_handle *fh = &the_fh;
641#else
642    struct nfs_file_io_handle *fh = malloc(sizeof(struct nfs_file_io_handle));
643    assert(fh != NULL);
644#endif
645    memset(fh, 0, sizeof(struct nfs_file_io_handle));
646
647#ifdef ASYNC_WRITES
648    fh->data = malloc(bytes);
649    assert(fh->data != NULL);
650    memcpy(fh->data, buffer, bytes);
651    h->inflight++;
652#else
653    fh->data = (void *)buffer;
654#endif
655    fh->size = bytes;
656    fh->offset = h->u.file.pos;
657    fh->status = NFS3_OK;
658    fh->handle = h->fh;
659    fh->back_fh = h;
660
661    // lwip_mutex_lock();
662
663    // start a parallel write of the file, wait for it to complete
664    int chunks = 0;
665    do {
666        struct nfs_file_parallel_io_handle *pfh =
667            malloc(sizeof(struct nfs_file_parallel_io_handle));
668        pfh->fh = fh;
669        pfh->fh->chunks_in_progress = 0;
670        pfh->chunk_start = fh->chunk_pos;
671        pfh->chunk_size = MIN(MAX_NFS_WRITE_BYTES, fh->size - pfh->chunk_start);
672        fh->chunk_pos += pfh->chunk_size;
673        e = nfs_write(nfs->client, fh->handle, fh->offset + pfh->chunk_start,
674                      (char *)fh->data + pfh->chunk_start, pfh->chunk_size,
675                      NFS_WRITE_STABILITY, write_callback, pfh);
676        assert(e == SYS_ERR_OK);
677        chunks++;
678    } while (fh->chunk_pos < fh->size && chunks < MAX_NFS_WRITE_CHUNKS);
679#ifndef ASYNC_WRITES
680    wait_for_condition();
681#endif
682
683    // lwip_mutex_unlock();
684
685#ifndef ASYNC_WRITES
686    // check result
687    if (fh->status != NFS3_OK && fh->status != NFS3ERR_STALE) {
688        printf("NFS Error: %d\n", fh->status);
689        return nfsstat_to_errval(fh->status);
690    }
691
692    assert(fh->size <= bytes);
693    h->u.file.pos += fh->size;
694    *bytes_written = fh->size;
695#else
696    // This always assumes it succeeded. We'll see failures at file close time.
697    h->u.file.pos += bytes;
698    *bytes_written = bytes;
699#endif
700
701    return SYS_ERR_OK;
702}
703
704
705static void setattr_callback(void *arg, struct nfs_client *client,
706                             SETATTR3res *result)
707{
708    assert(result != NULL);
709
710    xdr_SETATTR3res(&xdr_free, result);
711    assert(result->status == NFS3_OK);
712
713    signal_condition();
714}
715
716
717static errval_t nfs_truncate(void *st, vfs_handle_t handle, size_t bytes)
718{
719    struct nfs_handle *h = handle;
720
721    assert(!h->isdir);
722
723    struct nfs_state *nfs = st;
724    assert(h != NULL);
725    errval_t e;
726
727    // lwip_mutex_lock();
728    // We only set the size field for now
729
730    sattr3 new_attributes;
731    new_attributes.mode.set_it = FALSE;
732    new_attributes.uid.set_it = FALSE;
733    new_attributes.gid.set_it = FALSE;
734    new_attributes.atime.set_it = FALSE;
735    new_attributes.mtime.set_it = FALSE;
736    new_attributes.size.set_it = TRUE;
737    new_attributes.size.set_size3_u.size = bytes;
738
739
740    e = nfs_setattr(nfs->client, h->fh,
741                    new_attributes, false,
742                    setattr_callback, NULL);
743    assert(e == SYS_ERR_OK);
744    wait_for_condition();
745    // lwip_mutex_unlock();
746
747    return SYS_ERR_OK;
748}
749
750static enum vfs_filetype nfs_type_to_vfs_type(enum ftype3 type)
751{
752    switch(type) {
753    case NF3DIR:
754        return VFS_DIRECTORY;
755    case NF3REG:
756        return VFS_FILE;
757    default:
758        return VFS_FILE; // XXX
759    }
760}
761
762static void getattr_callback(void *arg, struct nfs_client *client,
763                             GETATTR3res *result)
764{
765    struct vfs_fileinfo *info = arg;
766
767    assert(result != NULL);
768    if (result->status == NFS3_OK) {
769        fattr3 *res = &result->GETATTR3res_u.resok.obj_attributes;
770
771        info->type = nfs_type_to_vfs_type(res->type);
772        info->size = res->size;
773    } else {
774        // XXX: no error reporting!
775        printf("GETATTR Error: %d\n", result->status);
776        info->type = -1;
777        info->size = -1;
778    }
779
780    xdr_GETATTR3res(&xdr_free, result);
781
782    signal_condition();
783}
784
785static errval_t tell(void *st, vfs_handle_t handle, size_t *pos)
786{
787    struct nfs_handle *h = handle;
788    assert(!h->isdir);
789    *pos = h->u.file.pos;
790    return SYS_ERR_OK;
791}
792
793static errval_t stat(void *st, vfs_handle_t inhandle, struct vfs_fileinfo *info)
794{
795    struct nfs_state *nfs = st;
796    struct nfs_handle *h = inhandle;
797    assert(h != NULL);
798    errval_t e;
799
800    // lwip_mutex_lock();
801    e = nfs_getattr(nfs->client, h->fh, getattr_callback, info);
802    assert(e == SYS_ERR_OK);
803    wait_for_condition();
804    // lwip_mutex_unlock();
805
806    assert(h->isdir == (info->type == VFS_DIRECTORY));
807
808    return SYS_ERR_OK;
809}
810
811static errval_t seek(void *st, vfs_handle_t handle, enum vfs_seekpos whence,
812                     off_t offset)
813{
814    struct nfs_handle *h = handle;
815    struct vfs_fileinfo info;
816    errval_t err;
817
818    assert(!h->isdir);
819
820    switch (whence) {
821    case VFS_SEEK_SET:
822        assert(offset >= 0);
823        h->u.file.pos = offset;
824        break;
825
826    case VFS_SEEK_CUR:
827        assert(offset >= 0 || -offset <= h->u.file.pos);
828        h->u.file.pos += offset;
829        break;
830
831    case VFS_SEEK_END:
832        err = stat(st, handle, &info);
833        if (err_is_fail(err)) {
834            return err;
835        }
836        assert(offset >= 0 || -offset <= info.size);
837        h->u.file.pos = info.size + offset;
838        break;
839
840    default:
841        USER_PANIC("invalid whence argument to nfs seek");
842    }
843
844    return SYS_ERR_OK;
845}
846
847static void readdir_callback(void *arg, struct nfs_client *client,
848                             READDIR3res *result)
849{
850    struct nfs_handle *h = arg;
851    assert(h->isdir);
852
853    assert(result != NULL && result->status == NFS3_OK); // XXX
854
855    // make copy of result object, as it is allocated on our caller's stack
856    assert(h->u.dir.readdir_result == NULL);
857    h->u.dir.readdir_result = malloc(sizeof(READDIR3res));
858    assert(h->u.dir.readdir_result != NULL);
859    memcpy(h->u.dir.readdir_result, result, sizeof(READDIR3res));
860
861    h->u.dir.readdir_next = result->READDIR3res_u.resok.reply.entries;
862    h->u.dir.readdir_prev = NULL;
863
864    signal_condition();
865}
866
867static void get_info_lookup_cb(void *arg, struct nfs_client *client,
868                               LOOKUP3res *result)
869{
870    assert(result != NULL && result->status == NFS3_OK); // XXX
871
872    LOOKUP3resok *resok = &result->LOOKUP3res_u.resok;
873
874    errval_t e = nfs_getattr(client, resok->object, getattr_callback, arg);
875    assert(err_is_ok(e));
876
877    xdr_LOOKUP3res(&xdr_free, result);
878}
879
880static errval_t dir_read_next(void *st, vfs_handle_t inhandle,
881                              char **retname, struct vfs_fileinfo *info)
882{
883    struct nfs_state *nfs = st;
884    struct nfs_handle *h = inhandle;
885    struct entry3 *entry;
886    errval_t e;
887
888    assert(h->isdir);
889
890    // lwip_mutex_lock();
891
892top:
893    // do we have a cached result?
894    if (h->u.dir.readdir_result != NULL && h->u.dir.readdir_next != NULL) {
895        entry = h->u.dir.readdir_next;
896        h->u.dir.readdir_prev = entry;
897        h->u.dir.readdir_next = entry->nextentry;
898    } else if (h->u.dir.readdir_result != NULL
899               && h->u.dir.readdir_result->READDIR3res_u.resok.reply.eof) {
900        // lwip_mutex_unlock();
901        return FS_ERR_INDEX_BOUNDS; // end of list
902    } else {
903
904        if (h->u.dir.readdir_result != NULL) { // subsequent call
905            struct READDIR3res *oldresult = h->u.dir.readdir_result;
906            struct entry3 *oldentry = h->u.dir.readdir_prev;
907            h->u.dir.readdir_result = NULL;
908
909            e = nfs_readdir(nfs->client, h->fh, oldentry->cookie,
910                            oldresult->READDIR3res_u.resok.cookieverf,
911                            readdir_callback, h);
912            assert(e == SYS_ERR_OK);
913
914            xdr_READDIR3res(&xdr_free, oldresult);
915            free(oldresult);
916        } else { // first call
917            e = nfs_readdir(nfs->client, h->fh, NFS_READDIR_COOKIE,
918                            NFS_READDIR_COOKIEVERF, readdir_callback, h);
919            assert(e == SYS_ERR_OK);
920        }
921
922        wait_for_condition();
923
924        entry = h->u.dir.readdir_next;
925        h->u.dir.readdir_prev = entry;
926        h->u.dir.readdir_next = entry != NULL ? entry->nextentry : NULL;
927    }
928
929    if (entry == NULL) {
930        assert(h->u.dir.readdir_result->READDIR3res_u.resok.reply.eof);
931        // lwip_mutex_unlock();
932        return FS_ERR_INDEX_BOUNDS;
933    } else {
934        assert(entry->name != NULL);
935        if (strcmp(entry->name, ".") == 0 || strcmp(entry->name, "..") == 0) {
936            // XXX: hide these from the VFS, because it doesn't understand them
937            goto top;
938        }
939        if (retname != NULL) {
940            *retname = strdup(entry->name);
941        }
942        if (info != NULL) {
943            // initiate a lookup/getattr call to find out this information
944            e = nfs_lookup(nfs->client, h->fh, entry->name, get_info_lookup_cb,
945                           info);
946            assert(e == SYS_ERR_OK);
947            wait_for_condition();
948        }
949        // lwip_mutex_unlock();
950        return SYS_ERR_OK;
951    }
952}
953
954static errval_t close(void *st, vfs_handle_t inhandle)
955{
956    struct nfs_handle *h = inhandle;
957    assert(!h->isdir);
958
959#ifdef ASYNC_WRITES
960    while(h->inflight > 0) {
961        wait_for_condition();
962    }
963
964    // XXX: Errors ignored for now. Will be reported by handler functions though.
965#endif
966
967    nfs_freefh(h->fh);
968    free(h);
969    return SYS_ERR_OK;
970}
971
972static errval_t closedir(void *st, vfs_handle_t inhandle)
973{
974    struct nfs_handle *h = inhandle;
975    assert(h->isdir);
976    if (h->u.dir.readdir_result != NULL) {
977        xdr_READDIR3res(&xdr_free, h->u.dir.readdir_result);
978        free(h->u.dir.readdir_result);
979    }
980    nfs_freefh(h->fh);
981    free(h);
982    return SYS_ERR_OK;
983}
984
985static void remove_callback(void *arg, struct nfs_client *client,
986                            REMOVE3res *result)
987{
988    struct nfs_handle *h = arg;
989    assert(result != NULL);
990
991    // XXX: Should find better way to report error
992    h->fh.data_len = result->status;
993
994    signal_condition();
995}
996
997static void remove_resolve_cont(void *st, errval_t err, struct nfs_fh3 fh,
998                                struct fattr3 *fattr)
999{
1000    struct nfs_handle *h = st;
1001
1002    if (err_is_fail(err) || (fattr != NULL && fattr->type != NF3DIR)) {
1003        DEBUG_ERR(err, "failure in remove_resolve_cont");
1004        // FIXME: failed to lookup directory. return meaningful error
1005        signal_condition();
1006        return;
1007    }
1008
1009    errval_t r = nfs_remove(h->nfs->client, fh, h->st, remove_callback, h);
1010    assert(r == SYS_ERR_OK);
1011}
1012
1013static errval_t vfs_nfs_remove(void *st, const char *path)
1014{
1015    struct nfs_state *nfs = st;
1016
1017    // find last path component
1018    char *filename = strrchr(path, VFS_PATH_SEP);
1019    if (filename == NULL) {
1020        return FS_ERR_NOTFOUND;
1021    }
1022
1023    // get directory name in separate nul-terminated string buffer
1024    char *dir = malloc(filename - path + 1);
1025    assert(dir != NULL);
1026    memcpy(dir, path, filename - path);
1027    dir[filename - path] = '\0';
1028
1029    // advance past separator
1030    filename = filename + 1;
1031
1032    struct nfs_handle *h = malloc(sizeof(struct nfs_handle));
1033    assert(h != NULL);
1034
1035    h->isdir = false;
1036    h->u.file.pos = 0;
1037    h->nfs = nfs;
1038    h->fh = NULL_NFS_FH;
1039    h->st = filename;
1040    h->fh.data_len = 0;
1041#ifdef ASYNC_WRITES
1042    h->inflight = 0;
1043#endif
1044
1045    // lwip_mutex_lock();
1046    initiate_resolve(nfs, dir, remove_resolve_cont, h);
1047    wait_for_condition();
1048    // lwip_mutex_unlock();
1049
1050    size_t err = h->fh.data_len;
1051    free(dir);
1052    free(h);
1053
1054    switch(err) {
1055    case NFS3_OK:
1056        return SYS_ERR_OK;
1057
1058    case NFS3ERR_NOENT:
1059        return FS_ERR_NOTFOUND;
1060
1061    default:
1062        // XXX: Unknown error
1063        return NFS_ERR_TRANSPORT;
1064    }
1065}
1066
1067struct mkdir_state {
1068    struct nfs_client *client;
1069    const char *dirname;
1070    errval_t err;
1071};
1072
1073static void mkdir_callback(void *arg, struct nfs_client *client,
1074                           MKDIR3res *result)
1075{
1076    struct mkdir_state *st = arg;
1077
1078    assert(result != NULL);
1079
1080    st->err = nfsstat_to_errval(result->status);
1081
1082    signal_condition();
1083}
1084
1085static void mkdir_resolve_cont(void *st, errval_t err, struct nfs_fh3 fh,
1086                               struct fattr3 *fattr)
1087{
1088    struct mkdir_state *s = st;
1089
1090    if (err_is_fail(err) || (fattr != NULL && fattr->type != NF3DIR)) {
1091        // failed to lookup directory
1092        if (err_is_fail(err)) {
1093            s->err = err;
1094        } else {
1095            s->err = FS_ERR_NOTDIR;
1096        }
1097        signal_condition();
1098        return;
1099    }
1100
1101    static struct sattr3 nulattr;
1102    errval_t r = nfs_mkdir(s->client, fh, s->dirname, nulattr, mkdir_callback, s);
1103    if (r != SYS_ERR_OK) { // XXX: proper error handling
1104        debug_printf("error in mkdir_resolve_cont %zd\n", r);
1105    }
1106    assert(r == SYS_ERR_OK);
1107}
1108
1109static errval_t mkdir(void *st, const char *path)
1110{
1111    struct nfs_state *nfs = st;
1112
1113    // find last path component
1114    char *dirname = strrchr(path, VFS_PATH_SEP);
1115    if (dirname == NULL) {
1116        return FS_ERR_NOTFOUND;
1117    }
1118
1119    // get directory name in separate nul-terminated string buffer
1120    char *parent = malloc(dirname - path + 1);
1121    assert(parent != NULL);
1122    memcpy(parent, path, dirname - path);
1123    parent[dirname - path] = '\0';
1124
1125    // advance past separator
1126    dirname = dirname + 1;
1127
1128    struct mkdir_state state = {
1129        .dirname = dirname,
1130        .client = nfs->client,
1131        .err = SYS_ERR_OK,
1132    };
1133
1134    // lwip_mutex_lock();
1135    initiate_resolve(nfs, parent, mkdir_resolve_cont, &state);
1136    wait_for_condition();
1137    // lwip_mutex_unlock();
1138
1139    free(parent);
1140
1141    return state.err;
1142}
1143
1144#ifdef WITH_BUFFER_CACHE
1145
1146static errval_t get_bcache_key(void *st, vfs_handle_t inhandle,
1147                               char **retkey, size_t *keylen, size_t *retoffset)
1148{
1149    struct nfs_handle *h = inhandle;
1150    assert(h != NULL);
1151    size_t filepos = h->u.file.pos;
1152
1153    assert(!h->isdir);
1154
1155    size_t blockid = filepos / BUFFER_CACHE_BLOCK_SIZE;
1156    *retoffset = filepos % BUFFER_CACHE_BLOCK_SIZE;
1157
1158#if 0
1159    // XXX: Incredibly slow way to generate a looong hash key
1160    *retkey = malloc(200);
1161    *retkey[0] = '\0';
1162    char str[20];
1163    for(int i = 0; i < h->fh.data_len; i++) {
1164        snprintf(str, 20, "%02x", h->fh.data_val[i]);
1165        strcat(*retkey, str);
1166        assert(strlen(*retkey) < 200);
1167    }
1168    snprintf(str, 20, "/%lx", blockid);
1169    strcat(*retkey, str);
1170#else
1171    *keylen = h->fh.data_len + sizeof(blockid);
1172    *retkey = malloc(*keylen);
1173    assert(*retkey != NULL);
1174    memcpy(*retkey, h->fh.data_val, h->fh.data_len);
1175    memcpy(&(*retkey)[h->fh.data_len], &blockid, sizeof(blockid));
1176#endif
1177
1178    return SYS_ERR_OK;
1179}
1180
1181static errval_t read_block(void *st, vfs_handle_t inhandle, void *buffer,
1182                           size_t *bytes_read)
1183{
1184    struct nfs_state *nfs = st;
1185    struct nfs_handle *h = inhandle;
1186    assert(h != NULL);
1187    errval_t e;
1188
1189    assert(!h->isdir);
1190
1191    // set up the handle
1192    struct nfs_file_io_handle fh;
1193    memset(&fh, 0, sizeof(struct nfs_file_io_handle));
1194
1195    fh.data = buffer;
1196    fh.size = BUFFER_CACHE_BLOCK_SIZE;
1197    fh.offset = (h->u.file.pos / BUFFER_CACHE_BLOCK_SIZE) * BUFFER_CACHE_BLOCK_SIZE;
1198    fh.status = NFS3_OK;
1199    fh.handle = h->fh;
1200    fh.chunks_in_progress = 0;
1201
1202    // lwip_mutex_lock();
1203
1204    // start a parallel load of the file, wait for it to complete
1205    int chunks = 0;
1206    while (fh.chunk_pos < fh.size && chunks < MAX_NFS_READ_CHUNKS) {
1207//    while (fh.chunk_pos < fh.size) {
1208        struct nfs_file_parallel_io_handle *pfh =
1209            malloc(sizeof(struct nfs_file_parallel_io_handle));
1210
1211        pfh->fh = &fh;
1212        pfh->chunk_start = fh.chunk_pos;
1213        pfh->chunk_size = MIN(MAX_NFS_READ_BYTES, fh.size - pfh->chunk_start);
1214        fh.chunk_pos += pfh->chunk_size;
1215        fh.chunks_in_progress++;
1216        e = nfs_read(nfs->client, fh.handle, fh.offset + pfh->chunk_start,
1217                     pfh->chunk_size, read_callback, pfh);
1218        if (e == ERR_MEM) { // internal resource limit in lwip?
1219            fh.chunk_pos -= pfh->chunk_size;
1220            free(pfh);
1221            break;
1222        }
1223        assert(e == SYS_ERR_OK);
1224        chunks++;
1225    } // end while
1226    wait_for_condition();
1227
1228    // lwip_mutex_unlock();
1229
1230    // check result
1231    if (fh.status != NFS3_OK && fh.status != NFS3ERR_STALE) {
1232        return nfsstat_to_errval(fh.status);
1233    }
1234
1235    assert(fh.size <= BUFFER_CACHE_BLOCK_SIZE);
1236    *bytes_read = fh.size;
1237
1238    if (fh.size < BUFFER_CACHE_BLOCK_SIZE) {
1239        /* XXX: assuming this means EOF, but we really do know from NFS */
1240        return VFS_ERR_EOF;
1241    } else {
1242        return SYS_ERR_OK;
1243    }
1244}
1245
1246#if 0
1247static errval_t write_block(void *st, vfs_handle_t handle, const void *buffer,
1248                            size_t bytes, size_t *bytes_written)
1249{
1250    struct nfs_state *nfs = st;
1251    struct nfs_handle *h = handle;
1252    assert(h != NULL);
1253    errval_t e;
1254
1255    assert(!h->isdir);
1256
1257    // set up the handle
1258    struct nfs_file_io_handle fh;
1259    memset(&fh, 0, sizeof(struct nfs_file_io_handle));
1260
1261    assert(bytes <= BUFFER_CACHE_BLOCK_SIZE);
1262
1263    fh.data = (void *)buffer;
1264    fh.size = bytes;
1265    fh.offset = (h->u.file.pos / BUFFER_CACHE_BLOCK_SIZE) * BUFFER_CACHE_BLOCK_SIZE;
1266    fh.status = NFS3_OK;
1267    fh.handle = h->fh;
1268
1269    lwip_mutex_lock();
1270
1271    // start a parallel write of the file, wait for it to complete
1272    int chunks = 0;
1273    do {
1274        struct nfs_file_parallel_io_handle *pfh =
1275            malloc(sizeof(struct nfs_file_parallel_io_handle));
1276        pfh->fh = &fh;
1277        pfh->chunk_start = fh.chunk_pos;
1278        pfh->chunk_size = MIN(MAX_NFS_WRITE_BYTES, fh.size - pfh->chunk_start);
1279        fh.chunk_pos += pfh->chunk_size;
1280        e = nfs_write(nfs->client, fh.handle, fh.offset + pfh->chunk_start,
1281                      (char *)fh.data + pfh->chunk_start, pfh->chunk_size,
1282                      NFS_WRITE_STABILITY, write_callback, pfh);
1283        assert(e == SYS_ERR_OK);
1284        chunks++;
1285    } while (fh.chunk_pos < fh.size && chunks < MAX_NFS_WRITE_CHUNKS);
1286    wait_for_condition();
1287
1288    lwip_mutex_unlock();
1289
1290    // check result
1291    if (fh.status != NFS3_OK) {
1292        return nfsstat_to_errval(fh.status);
1293    }
1294
1295    assert(fh.size <= bytes);
1296    h->u.file.pos += fh.size;
1297    *bytes_written = fh.size;
1298
1299    return SYS_ERR_OK;
1300}
1301#endif
1302
1303#endif
1304
1305static void
1306mount_callback (void *arg, struct nfs_client *client, enum mountstat3 mountstat,
1307                struct nfs_fh3 fhandle)
1308{
1309    struct nfs_state *st = arg;
1310
1311    st->mountstat = mountstat;
1312
1313    // save the root dir handle
1314    nfs_copyfh(&st->rootfh, fhandle);
1315    // signal the waiting code to continue execution
1316    signal_condition();
1317}
1318
1319static struct vfs_ops nfsops = {
1320    .open = open,
1321    .create = create,
1322    .read = read,
1323    .write = write,
1324    .seek = seek,
1325    .truncate = nfs_truncate,
1326    .tell = tell,
1327    .stat = stat,
1328    .close = close,
1329    .opendir = opendir,
1330    .dir_read_next = dir_read_next,
1331    .closedir = closedir,
1332    .remove = vfs_nfs_remove,
1333    .mkdir = mkdir,
1334    //.rmdir = rmdir,
1335
1336#ifdef WITH_BUFFER_CACHE
1337    .get_bcache_key = get_bcache_key,
1338    .read_block = read_block,
1339    //.write_block = write_block,
1340#endif
1341};
1342
1343errval_t vfs_nfs_mount(const char *uri, void **retst, struct vfs_ops **retops)
1344{
1345    // skip over protocol part of URI
1346    errval_t err;
1347    char *host = strstr(uri, "://");
1348    if (host == NULL) {
1349        return VFS_ERR_BAD_URI;
1350    }
1351    host += 3;
1352
1353    // locate next '/', assume everything before that is an IP address
1354    char *path = strchr(host, '/');
1355    if (path == NULL) {
1356        return VFS_ERR_BAD_URI;
1357    }
1358
1359    char host_copy[path - host + 1];
1360    memcpy(host_copy, host, path - host);
1361    host_copy[path - host] = '\0';
1362
1363    struct in_addr server;
1364    if (inet_aton(host_copy, &server) == 0) {
1365        printf("Invalid host IP: %s\n", host_copy);
1366        return VFS_ERR_BAD_URI;
1367    }
1368
1369    // init stack if needed
1370    static bool stack_inited;
1371    if (!stack_inited) {
1372        // lwip_init_auto();
1373        err = net_sockets_init();
1374        if (err_is_fail(err)) {
1375            return err;
1376        }
1377        stack_inited = true;
1378    }
1379
1380    struct nfs_state *st = malloc(sizeof(struct nfs_state));
1381    assert(st != NULL);
1382
1383    // lwip_mutex_lock();
1384    st->client = nfs_mount(server, path, mount_callback, st);
1385    assert(st->client != NULL);
1386    wait_for_condition();
1387    // lwip_mutex_unlock();
1388
1389    if (st->mountstat == MNT3_OK) {
1390        *retst = st;
1391        *retops = &nfsops;
1392
1393#ifdef WITH_BUFFER_CACHE
1394        return buffer_cache_enable(retst, retops);
1395#else
1396        return SYS_ERR_OK;
1397#endif
1398    } else {
1399        errval_t ret = mountstat_to_errval(st->mountstat);
1400        free(st);
1401        return ret;
1402    }
1403}
1404