1/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
3 *
4 *  Copyright (C) 2000 Stelias Computing, Inc.
5 *  Copyright (C) 2000 Red Hat, Inc.
6 *  Copyright (C) 2000 TurboLinux, Inc.
7 *  Copyright (C) 2000 Los Alamos National Laboratory.
8 *  Copyright (C) 2000, 2001 Tacit Networks, Inc.
9 *  Copyright (C) 2000 Peter J. Braam
10 *  Copyright (C) 2001 Mountain View Data, Inc.
11 *  Copyright (C) 2001 Cluster File Systems, Inc.
12 *
13 *   This file is part of InterMezzo, http://www.inter-mezzo.org.
14 *
15 *   InterMezzo is free software; you can redistribute it and/or
16 *   modify it under the terms of version 2 of the GNU General Public
17 *   License as published by the Free Software Foundation.
18 *
19 *   InterMezzo is distributed in the hope that it will be useful,
20 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
21 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22 *   GNU General Public License for more details.
23 *
24 *   You should have received a copy of the GNU General Public License
25 *   along with InterMezzo; if not, write to the Free Software
26 *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 *
28 *  This file manages file I/O
29 *
30 */
31
32#include <stdarg.h>
33
34#include <asm/bitops.h>
35#include <asm/uaccess.h>
36#include <asm/system.h>
37
38#include <linux/errno.h>
39#include <linux/fs.h>
40#include <linux/ext2_fs.h>
41#include <linux/slab.h>
42#include <linux/vmalloc.h>
43#include <linux/sched.h>
44#include <linux/stat.h>
45#include <linux/string.h>
46#include <linux/locks.h>
47#include <linux/blkdev.h>
48#include <linux/init.h>
49#include <linux/smp_lock.h>
50#define __NO_VERSION__
51#include <linux/module.h>
52
53#include <linux/intermezzo_fs.h>
54#include <linux/intermezzo_psdev.h>
55#include <linux/fsfilter.h>
56/*
57 * these are initialized in super.c
58 */
59extern int presto_permission(struct inode *inode, int mask);
60
61
62static int presto_open_upcall(int minor, struct dentry *de)
63{
64        int rc;
65        char *path, *buffer;
66        struct presto_file_set *fset;
67        int pathlen;
68        struct lento_vfs_context info;
69        struct presto_dentry_data *dd = presto_d2d(de);
70
71        PRESTO_ALLOC(buffer, PAGE_SIZE);
72        if ( !buffer ) {
73                CERROR("PRESTO: out of memory!\n");
74                return -ENOMEM;
75        }
76        fset = presto_fset(de);
77        path = presto_path(de, fset->fset_dentry, buffer, PAGE_SIZE);
78        pathlen = MYPATHLEN(buffer, path);
79
80        CDEBUG(D_FILE, "de %p, dd %p\n", de, dd);
81        if (dd->remote_ino == 0) {
82                rc = presto_get_fileid(minor, fset, de);
83        }
84        memset (&info, 0, sizeof(info));
85        if (dd->remote_ino > 0) {
86                info.remote_ino = dd->remote_ino;
87                info.remote_generation = dd->remote_generation;
88        } else
89                CERROR("get_fileid failed %d, ino: %Lx, fetching by name\n", rc,
90                       dd->remote_ino);
91
92        rc = izo_upc_open(minor, pathlen, path, fset->fset_name, &info);
93        PRESTO_FREE(buffer, PAGE_SIZE);
94        return rc;
95}
96
97static inline int open_check_dod(struct file *file,
98                                 struct presto_file_set *fset)
99{
100        int gen, is_iopen = 0, minor;
101        struct presto_cache *cache = fset->fset_cache;
102        ino_t inum;
103
104        minor = presto_c2m(cache);
105
106        if ( ISLENTO(minor) ) {
107                CDEBUG(D_CACHE, "is lento, not doing DOD.\n");
108                return 0;
109        }
110
111        /* Files are only ever opened by inode during backfetches, when by
112         * definition we have the authoritative copy of the data.  No DOD. */
113        is_iopen = izo_dentry_is_ilookup(file->f_dentry, &inum, &gen);
114
115        if (is_iopen) {
116                CDEBUG(D_CACHE, "doing iopen, not doing DOD.\n");
117                return 0;
118        }
119
120        if (!(fset->fset_flags & FSET_DATA_ON_DEMAND)) {
121                CDEBUG(D_CACHE, "fileset not on demand.\n");
122                return 0;
123        }
124
125        if (file->f_flags & O_TRUNC) {
126                CDEBUG(D_CACHE, "fileset dod: O_TRUNC.\n");
127                return 0;
128        }
129
130        if (presto_chk(file->f_dentry, PRESTO_DONT_JOURNAL)) {
131                CDEBUG(D_CACHE, "file under .intermezzo, not doing DOD\n");
132                return 0;
133        }
134
135        if (presto_chk(file->f_dentry, PRESTO_DATA)) {
136                CDEBUG(D_CACHE, "PRESTO_DATA is set, not doing DOD.\n");
137                return 0;
138        }
139
140        if (cache->cache_filter->o_trops->tr_all_data(file->f_dentry->d_inode)) {
141                CDEBUG(D_CACHE, "file not sparse, not doing DOD.\n");
142                return 0;
143        }
144
145        return 1;
146}
147
148static int presto_file_open(struct inode *inode, struct file *file)
149{
150        int rc = 0;
151        struct file_operations *fops;
152        struct presto_cache *cache;
153        struct presto_file_set *fset;
154        struct presto_file_data *fdata;
155        int writable = (file->f_flags & (O_RDWR | O_WRONLY));
156        int minor, i;
157
158        ENTRY;
159
160        if (presto_prep(file->f_dentry, &cache, &fset) < 0) {
161                EXIT;
162                return -EBADF;
163        }
164
165        minor = presto_c2m(cache);
166
167        CDEBUG(D_CACHE, "DATA_OK: %d, ino: %ld, islento: %d\n",
168               presto_chk(file->f_dentry, PRESTO_DATA), inode->i_ino,
169               ISLENTO(minor));
170
171        if ( !ISLENTO(minor) && (file->f_flags & O_RDWR ||
172                                 file->f_flags & O_WRONLY)) {
173                CDEBUG(D_CACHE, "calling presto_get_permit\n");
174                if ( presto_get_permit(inode) < 0 ) {
175                        EXIT;
176                        return -EROFS;
177                }
178                presto_put_permit(inode);
179        }
180
181        if (open_check_dod(file, fset)) {
182                CDEBUG(D_CACHE, "presto_open_upcall\n");
183                CDEBUG(D_CACHE, "dentry: %p setting DATA, ATTR\n", file->f_dentry);
184                presto_set(file->f_dentry, PRESTO_ATTR | PRESTO_DATA);
185                rc = presto_open_upcall(minor, file->f_dentry);
186                if (rc) {
187                        EXIT;
188                        CERROR("%s: returning error %d\n", __FUNCTION__, rc);
189                        return rc;
190                }
191
192        }
193
194        /* file was truncated upon open: do not refetch */
195        if (file->f_flags & O_TRUNC) {
196                CDEBUG(D_CACHE, "setting DATA, ATTR\n");
197                presto_set(file->f_dentry, PRESTO_ATTR | PRESTO_DATA);
198        }
199
200        fops = filter_c2cffops(cache->cache_filter);
201        if ( fops->open ) {
202                CDEBUG(D_CACHE, "calling fs open\n");
203                rc = fops->open(inode, file);
204
205                if (rc) {
206                        EXIT;
207                        return rc;
208                }
209        }
210
211        if (writable) {
212                PRESTO_ALLOC(fdata, sizeof(*fdata));
213                if (!fdata) {
214                        EXIT;
215                        return -ENOMEM;
216                }
217                fdata->fd_do_lml = 0;
218                fdata->fd_bytes_written = 0;
219                fdata->fd_fsuid = current->fsuid;
220                fdata->fd_fsgid = current->fsgid;
221                fdata->fd_mode = file->f_dentry->d_inode->i_mode;
222                fdata->fd_uid = file->f_dentry->d_inode->i_uid;
223                fdata->fd_gid = file->f_dentry->d_inode->i_gid;
224                fdata->fd_ngroups = current->ngroups;
225                for (i=0 ; i < current->ngroups ; i++)
226                        fdata->fd_groups[i] = current->groups[i];
227                if (!ISLENTO(minor))
228                        fdata->fd_info.flags = LENTO_FL_KML;
229                else {
230                        /* this is for the case of DOD,
231                           reint_close will adjust flags if needed */
232                        fdata->fd_info.flags = 0;
233                }
234
235                presto_getversion(&fdata->fd_version, inode);
236                file->private_data = fdata;
237        } else {
238                file->private_data = NULL;
239        }
240
241        EXIT;
242        return 0;
243}
244
245int presto_adjust_lml(struct file *file, struct lento_vfs_context *info)
246{
247        struct presto_file_data *fdata =
248                (struct presto_file_data *) file->private_data;
249
250        if (!fdata) {
251                EXIT;
252                return -EINVAL;
253        }
254
255        memcpy(&fdata->fd_info, info, sizeof(*info));
256        EXIT;
257        return 0;
258}
259
260
261static int presto_file_release(struct inode *inode, struct file *file)
262{
263        int rc;
264        struct file_operations *fops;
265        struct presto_cache *cache;
266        struct presto_file_set *fset;
267        struct presto_file_data *fdata =
268                (struct presto_file_data *)file->private_data;
269        ENTRY;
270
271        rc = presto_prep(file->f_dentry, &cache, &fset);
272        if ( rc ) {
273                EXIT;
274                return rc;
275        }
276
277        fops = filter_c2cffops(cache->cache_filter);
278        if (fops && fops->release)
279                rc = fops->release(inode, file);
280
281        CDEBUG(D_CACHE, "islento = %d (minor %d), rc %d, data %p\n",
282               ISLENTO(cache->cache_psdev->uc_minor),
283               cache->cache_psdev->uc_minor, rc, fdata);
284
285        /* this file was modified: ignore close errors, write KML */
286        if (fdata && fdata->fd_do_lml) {
287                if ( presto_get_permit(inode) < 0 ) {
288                        EXIT;
289                        return -EROFS;
290                }
291
292                fdata->fd_info.updated_time = file->f_dentry->d_inode->i_mtime;
293                rc = presto_do_close(fset, file);
294                presto_put_permit(inode);
295        }
296
297        if (!rc && fdata) {
298                PRESTO_FREE(fdata, sizeof(*fdata));
299                file->private_data = NULL;
300        }
301
302        EXIT;
303        return rc;
304}
305
306static void presto_apply_write_policy(struct file *file,
307                                      struct presto_file_set *fset, loff_t res)
308{
309        struct presto_file_data *fdata =
310                (struct presto_file_data *)file->private_data;
311        struct presto_cache *cache = fset->fset_cache;
312        struct presto_version new_file_ver;
313        int error;
314        struct rec_info rec;
315
316
317        if ((fset->fset_flags & FSET_JCLOSE_ON_WRITE) &&
318            (!ISLENTO(cache->cache_psdev->uc_minor))) {
319                fdata->fd_bytes_written += res;
320
321                if (fdata->fd_bytes_written >= fset->fset_file_maxio) {
322                        presto_getversion(&new_file_ver,
323                                          file->f_dentry->d_inode);
324                        /* This is really heavy weight and should be fixed
325                           ASAP. At most we should be recording the number
326                           of bytes written and not locking the kernel,
327                           wait for permits, etc, on the write path. SHP
328                        */
329                        lock_kernel();
330                        if ( presto_get_permit(file->f_dentry->d_inode) < 0 ) {
331                                EXIT;
332                                /* we must be disconnected, not to worry */
333                                unlock_kernel();
334                                return;
335                        }
336                        error = presto_journal_close(&rec, fset, file,
337                                                     file->f_dentry,
338                                                     &fdata->fd_version,
339                                                     &new_file_ver);
340                        presto_put_permit(file->f_dentry->d_inode);
341                        unlock_kernel();
342                        if ( error ) {
343                                CERROR("presto_close: cannot journal close\n");
344                                /* panic(); */
345                                return;
346                        }
347                        fdata->fd_bytes_written = 0;
348                }
349        }
350}
351
352static ssize_t presto_file_write(struct file *file, const char *buf,
353                                 size_t size, loff_t *off)
354{
355        struct rec_info rec;
356        int error;
357        struct presto_cache *cache;
358        struct presto_file_set *fset;
359        struct file_operations *fops;
360        ssize_t res;
361        int do_lml_here;
362        void *handle = NULL;
363        unsigned long blocks;
364        struct presto_file_data *fdata;
365        loff_t res_size;
366
367        error = presto_prep(file->f_dentry, &cache, &fset);
368        if ( error ) {
369                EXIT;
370                return error;
371        }
372
373        blocks = (size >> file->f_dentry->d_inode->i_sb->s_blocksize_bits) + 1;
374        res_size = 2 * PRESTO_REQHIGH + ((blocks+3)
375                << file->f_dentry->d_inode->i_sb->s_blocksize_bits);
376
377        error = presto_reserve_space(fset->fset_cache, res_size);
378        CDEBUG(D_INODE, "Reserved %Ld for %d\n", res_size, size);
379        if ( error ) {
380                EXIT;
381                return -ENOSPC;
382        }
383
384        CDEBUG(D_INODE, "islento %d, minor: %d\n",
385               ISLENTO(cache->cache_psdev->uc_minor),
386               cache->cache_psdev->uc_minor);
387
388        read_lock(&fset->fset_lml.fd_lock);
389        fdata = (struct presto_file_data *)file->private_data;
390        do_lml_here = size && (fdata->fd_do_lml == 0) &&
391                !presto_chk(file->f_dentry, PRESTO_DONT_JOURNAL);
392
393        if (do_lml_here)
394                fdata->fd_do_lml = 1;
395        read_unlock(&fset->fset_lml.fd_lock);
396
397        res = 0;
398        if (do_lml_here) {
399                struct presto_version file_version;
400                /* handle different space reqs from file system below! */
401                handle = presto_trans_start(fset, file->f_dentry->d_inode,
402                                            KML_OPCODE_WRITE);
403                if ( IS_ERR(handle) ) {
404                        presto_release_space(fset->fset_cache, res_size);
405                        CERROR("presto_write: no space for transaction\n");
406                        return -ENOSPC;
407                }
408
409                presto_getversion(&file_version, file->f_dentry->d_inode);
410                res = presto_write_lml_close(&rec, fset, file,
411                                             fdata->fd_info.remote_ino,
412                                             fdata->fd_info.remote_generation,
413                                             &fdata->fd_info.remote_version,
414                                             &file_version);
415                fdata->fd_lml_offset = rec.offset;
416                if ( res ) {
417                        CERROR("intermezzo: PANIC failed to write LML\n");
418                        *(int *)0 = 1;
419                        EXIT;
420                        goto exit_write;
421                }
422                presto_trans_commit(fset, handle);
423        }
424
425        fops = filter_c2cffops(cache->cache_filter);
426        res = fops->write(file, buf, size, off);
427        if ( res != size ) {
428                CDEBUG(D_FILE, "file write returns short write: size %d, res %d\n", size, res);
429        }
430
431        if ( (res > 0) && fdata )
432                 presto_apply_write_policy(file, fset, res);
433
434 exit_write:
435        presto_release_space(fset->fset_cache, res_size);
436        return res;
437}
438
439struct file_operations presto_file_fops = {
440        .write   = presto_file_write,
441        .open    = presto_file_open,
442        .release = presto_file_release,
443        .ioctl   = presto_ioctl
444};
445
446struct inode_operations presto_file_iops = {
447        .permission   = presto_permission,
448        .setattr      = presto_setattr,
449#ifdef CONFIG_FS_EXT_ATTR
450        .set_ext_attr = presto_set_ext_attr,
451#endif
452};
453
454int izo_purge_file(struct presto_file_set *fset, char *file)
455{
456        return 0;
457}
458