4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/obdclass/obd_mount.c
38 * Client mount routines
40 * Author: Nathan Rutman <nathan@clusterfs.com>
44 #define DEBUG_SUBSYSTEM S_CLASS
45 #define D_MOUNT (D_SUPER|D_CONFIG/*|D_WARNING */)
46 #define PRINT_CMD CDEBUG
48 #include "../include/obd.h"
49 #include "../include/linux/lustre_compat25.h"
50 #include "../include/obd_class.h"
51 #include "../include/lustre/lustre_user.h"
52 #include "../include/lustre_log.h"
53 #include "../include/lustre_disk.h"
54 #include "../include/lustre_param.h"
56 static int (*client_fill_super)(struct super_block *sb,
57 struct vfsmount *mnt);
59 static void (*kill_super_cb)(struct super_block *sb);
61 /**************** config llog ********************/
63 /** Get a config log from the MGS and process it.
64 * This func is called for both clients and servers.
65 * Continue to process new statements appended to the logs
66 * (whenever the config lock is revoked) until lustre_end_log
68 * @param sb The superblock is used by the MGC to write to the local copy of
70 * @param logname The name of the llog to replicate from the MGS
71 * @param cfg Since the same mgc may be used to follow multiple config logs
72 * (e.g. ost1, ost2, client), the config_llog_instance keeps the state for
73 * this log, and is added to the mgc's list of logs to follow.
75 int lustre_process_log(struct super_block *sb, char *logname,
76 struct config_llog_instance *cfg)
78 struct lustre_cfg *lcfg;
79 struct lustre_cfg_bufs *bufs;
80 struct lustre_sb_info *lsi = s2lsi(sb);
81 struct obd_device *mgc = lsi->lsi_mgc;
91 /* mgc_process_config */
92 lustre_cfg_bufs_reset(bufs, mgc->obd_name);
93 lustre_cfg_bufs_set_string(bufs, 1, logname);
94 lustre_cfg_bufs_set(bufs, 2, cfg, sizeof(*cfg));
95 lustre_cfg_bufs_set(bufs, 3, &sb, sizeof(sb));
96 lcfg = lustre_cfg_new(LCFG_LOG_START, bufs);
97 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
98 lustre_cfg_free(lcfg);
103 LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s'"
104 "failed from the MGS (%d). Make sure this "
105 "client and the MGS are running compatible "
106 "versions of Lustre.\n",
107 mgc->obd_name, logname, rc);
110 LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' "
111 "failed (%d). This may be the result of "
112 "communication errors between this node and "
113 "the MGS, a bad configuration, or other "
114 "errors. See the syslog for more "
115 "information.\n", mgc->obd_name, logname,
118 /* class_obd_list(); */
121 EXPORT_SYMBOL(lustre_process_log);
123 /* Stop watching this config log for updates */
124 int lustre_end_log(struct super_block *sb, char *logname,
125 struct config_llog_instance *cfg)
127 struct lustre_cfg *lcfg;
128 struct lustre_cfg_bufs bufs;
129 struct lustre_sb_info *lsi = s2lsi(sb);
130 struct obd_device *mgc = lsi->lsi_mgc;
136 /* mgc_process_config */
137 lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
138 lustre_cfg_bufs_set_string(&bufs, 1, logname);
140 lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
141 lcfg = lustre_cfg_new(LCFG_LOG_END, &bufs);
142 rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
143 lustre_cfg_free(lcfg);
146 EXPORT_SYMBOL(lustre_end_log);
148 /**************** obd start *******************/
150 /** lustre_cfg_bufs are a holdover from 1.4; we can still set these up from
151 * lctl (and do for echo cli/srv.
153 int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
154 char *s1, char *s2, char *s3, char *s4)
156 struct lustre_cfg_bufs bufs;
157 struct lustre_cfg * lcfg = NULL;
160 CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname,
161 cmd, s1, s2, s3, s4);
163 lustre_cfg_bufs_reset(&bufs, cfgname);
165 lustre_cfg_bufs_set_string(&bufs, 1, s1);
167 lustre_cfg_bufs_set_string(&bufs, 2, s2);
169 lustre_cfg_bufs_set_string(&bufs, 3, s3);
171 lustre_cfg_bufs_set_string(&bufs, 4, s4);
173 lcfg = lustre_cfg_new(cmd, &bufs);
174 lcfg->lcfg_nid = nid;
175 rc = class_process_config(lcfg);
176 lustre_cfg_free(lcfg);
179 EXPORT_SYMBOL(do_lcfg);
181 /** Call class_attach and class_setup. These methods in turn call
182 * obd type-specific methods.
184 int lustre_start_simple(char *obdname, char *type, char *uuid,
185 char *s1, char *s2, char *s3, char *s4)
188 CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
190 rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, NULL, NULL);
192 CERROR("%s attach error %d\n", obdname, rc);
195 rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, s3, s4);
197 CERROR("%s setup error %d\n", obdname, rc);
198 do_lcfg(obdname, 0, LCFG_DETACH, NULL, NULL, NULL, NULL);
203 DEFINE_MUTEX(mgc_start_lock);
205 /** Set up a mgc obd to process startup logs
207 * \param sb [in] super block of the mgc obd
209 * \retval 0 success, otherwise error code
211 int lustre_start_mgc(struct super_block *sb)
213 struct obd_connect_data *data = NULL;
214 struct lustre_sb_info *lsi = s2lsi(sb);
215 struct obd_device *obd;
216 struct obd_export *exp;
217 struct obd_uuid *uuid;
220 char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL;
222 int rc = 0, i = 0, j, len;
224 LASSERT(lsi->lsi_lmd);
226 /* Find the first non-lo MGS nid for our MGC name */
227 if (IS_SERVER(lsi)) {
228 /* mount -o mgsnode=nid */
229 ptr = lsi->lsi_lmd->lmd_mgs;
230 if (lsi->lsi_lmd->lmd_mgs &&
231 (class_parse_nid(lsi->lsi_lmd->lmd_mgs, &nid, &ptr) == 0)) {
233 } else if (IS_MGS(lsi)) {
234 lnet_process_id_t id;
235 while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
236 if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
243 } else { /* client */
244 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
245 ptr = lsi->lsi_lmd->lmd_dev;
246 if (class_parse_nid(ptr, &nid, &ptr) == 0)
250 CERROR("No valid MGS nids found.\n");
254 mutex_lock(&mgc_start_lock);
256 len = strlen(LUSTRE_MGC_OBDNAME) + strlen(libcfs_nid2str(nid)) + 1;
257 OBD_ALLOC(mgcname, len);
258 OBD_ALLOC(niduuid, len + 2);
259 if (!mgcname || !niduuid) {
263 sprintf(mgcname, "%s%s", LUSTRE_MGC_OBDNAME, libcfs_nid2str(nid));
265 mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";
273 obd = class_name2obd(mgcname);
274 if (obd && !obd->obd_stopping) {
277 rc = obd_set_info_async(NULL, obd->obd_self_export,
278 strlen(KEY_MGSSEC), KEY_MGSSEC,
279 strlen(mgssec), mgssec, NULL);
283 /* Re-using an existing MGC */
284 atomic_inc(&obd->u.cli.cl_mgc_refcount);
286 /* IR compatibility check, only for clients */
287 if (lmd_is_client(lsi->lsi_lmd)) {
289 int vallen = sizeof(*data);
290 __u32 *flags = &lsi->lsi_lmd->lmd_flags;
292 rc = obd_get_info(NULL, obd->obd_self_export,
293 strlen(KEY_CONN_DATA), KEY_CONN_DATA,
294 &vallen, data, NULL);
296 has_ir = OCD_HAS_FLAG(data, IMP_RECOV);
297 if (has_ir ^ !(*flags & LMD_FLG_NOIR)) {
298 /* LMD_FLG_NOIR is for test purpose only */
300 "Trying to mount a client with IR setting "
301 "not compatible with current mgc. "
302 "Force to use current mgc setting that is "
304 has_ir ? "enabled" : "disabled");
306 *flags &= ~LMD_FLG_NOIR;
308 *flags |= LMD_FLG_NOIR;
313 /* If we are restarting the MGS, don't try to keep the MGC's
314 old connection, or registration will fail. */
316 CDEBUG(D_MOUNT, "New MGS with live MGC\n");
320 /* Try all connections, but only once (again).
321 We don't want to block another target from starting
322 (using its local copy of the log), but we do want to connect
323 if at all possible. */
325 CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,
327 rc = obd_set_info_async(NULL, obd->obd_self_export,
328 sizeof(KEY_INIT_RECOV_BACKUP),
329 KEY_INIT_RECOV_BACKUP,
330 sizeof(recov_bk), &recov_bk, NULL);
335 CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname);
337 /* Add the primary nids for the MGS */
339 sprintf(niduuid, "%s_%x", mgcname, i);
340 if (IS_SERVER(lsi)) {
341 ptr = lsi->lsi_lmd->lmd_mgs;
343 /* Use local nids (including LO) */
344 lnet_process_id_t id;
345 while ((rc = LNetGetId(i++, &id)) != -ENOENT) {
346 rc = do_lcfg(mgcname, id.nid,
347 LCFG_ADD_UUID, niduuid,
351 /* Use mgsnode= nids */
352 /* mount -o mgsnode=nid */
353 if (lsi->lsi_lmd->lmd_mgs) {
354 ptr = lsi->lsi_lmd->lmd_mgs;
355 } else if (class_find_param(ptr, PARAM_MGSNODE,
357 CERROR("No MGS nids given.\n");
361 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
362 rc = do_lcfg(mgcname, nid,
363 LCFG_ADD_UUID, niduuid,
368 } else { /* client */
369 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
370 ptr = lsi->lsi_lmd->lmd_dev;
371 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
372 rc = do_lcfg(mgcname, nid,
373 LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
375 /* Stop at the first failover nid */
381 CERROR("No valid MGS nids found.\n");
385 lsi->lsi_lmd->lmd_mgs_failnodes = 1;
387 /* Random uuid for MGC allows easier reconnects */
389 ll_generate_random_uuid(uuidc);
390 class_uuid_unparse(uuidc, uuid);
393 rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
394 (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
395 niduuid, NULL, NULL);
400 /* Add any failover MGS nids */
402 while (ptr && ((*ptr == ':' ||
403 class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0))) {
404 /* New failover node */
405 sprintf(niduuid, "%s_%x", mgcname, i);
407 while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) {
409 rc = do_lcfg(mgcname, nid,
410 LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
415 rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
416 niduuid, NULL, NULL, NULL);
423 lsi->lsi_lmd->lmd_mgs_failnodes = i;
425 obd = class_name2obd(mgcname);
427 CERROR("Can't find mgcobd %s\n", mgcname);
432 rc = obd_set_info_async(NULL, obd->obd_self_export,
433 strlen(KEY_MGSSEC), KEY_MGSSEC,
434 strlen(mgssec), mgssec, NULL);
438 /* Keep a refcount of servers/clients who started with "mount",
439 so we know when we can get rid of the mgc. */
440 atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
442 /* We connect to the MGS at setup, and don't disconnect until cleanup */
443 data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT |
444 OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV |
445 OBD_CONNECT_LVB_TYPE;
447 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
448 data->ocd_connect_flags |= OBD_CONNECT_MNE_SWAB;
450 #warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
453 if (lmd_is_client(lsi->lsi_lmd) &&
454 lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
455 data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
456 data->ocd_version = LUSTRE_VERSION_CODE;
457 rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
459 CERROR("connect failed %d\n", rc);
463 obd->u.cli.cl_mgc_mgsexp = exp;
466 /* Keep the mgc info in the sb. Note that many lsi's can point
470 mutex_unlock(&mgc_start_lock);
475 OBD_FREE(mgcname, len);
477 OBD_FREE(niduuid, len + 2);
481 static int lustre_stop_mgc(struct super_block *sb)
483 struct lustre_sb_info *lsi = s2lsi(sb);
484 struct obd_device *obd;
485 char *niduuid = NULL, *ptr = NULL;
486 int i, rc = 0, len = 0;
495 mutex_lock(&mgc_start_lock);
496 LASSERT(atomic_read(&obd->u.cli.cl_mgc_refcount) > 0);
497 if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
498 /* This is not fatal, every client that stops
499 will call in here. */
500 CDEBUG(D_MOUNT, "mgc still has %d references.\n",
501 atomic_read(&obd->u.cli.cl_mgc_refcount));
506 /* The MGC has no recoverable data in any case.
507 * force shutdown set in umount_begin */
508 obd->obd_no_recov = 1;
510 if (obd->u.cli.cl_mgc_mgsexp) {
511 /* An error is not fatal, if we are unable to send the
512 disconnect mgs ping evictor cleans up the export */
513 rc = obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
515 CDEBUG(D_MOUNT, "disconnect failed %d\n", rc);
518 /* Save the obdname for cleaning the nid uuids, which are
520 len = strlen(obd->obd_name) + 6;
521 OBD_ALLOC(niduuid, len);
523 strcpy(niduuid, obd->obd_name);
524 ptr = niduuid + strlen(niduuid);
527 rc = class_manual_cleanup(obd);
531 /* Clean the nid uuids */
537 for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
538 sprintf(ptr, "_%x", i);
539 rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
540 niduuid, NULL, NULL, NULL);
542 CERROR("del MDC UUID %s failed: rc = %d\n",
547 OBD_FREE(niduuid, len);
549 /* class_import_put will get rid of the additional connections */
550 mutex_unlock(&mgc_start_lock);
554 /***************** lustre superblock **************/
556 struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
558 struct lustre_sb_info *lsi;
563 OBD_ALLOC_PTR(lsi->lsi_lmd);
569 lsi->lsi_lmd->lmd_exclude_count = 0;
570 lsi->lsi_lmd->lmd_recovery_time_soft = 0;
571 lsi->lsi_lmd->lmd_recovery_time_hard = 0;
572 s2lsi_nocast(sb) = lsi;
573 /* we take 1 extra ref for our setup */
574 atomic_set(&lsi->lsi_mounts, 1);
576 /* Default umount style */
577 lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
582 static int lustre_free_lsi(struct super_block *sb)
584 struct lustre_sb_info *lsi = s2lsi(sb);
586 LASSERT(lsi != NULL);
587 CDEBUG(D_MOUNT, "Freeing lsi %p\n", lsi);
589 /* someone didn't call server_put_mount. */
590 LASSERT(atomic_read(&lsi->lsi_mounts) == 0);
592 if (lsi->lsi_lmd != NULL) {
593 if (lsi->lsi_lmd->lmd_dev != NULL)
594 OBD_FREE(lsi->lsi_lmd->lmd_dev,
595 strlen(lsi->lsi_lmd->lmd_dev) + 1);
596 if (lsi->lsi_lmd->lmd_profile != NULL)
597 OBD_FREE(lsi->lsi_lmd->lmd_profile,
598 strlen(lsi->lsi_lmd->lmd_profile) + 1);
599 if (lsi->lsi_lmd->lmd_mgssec != NULL)
600 OBD_FREE(lsi->lsi_lmd->lmd_mgssec,
601 strlen(lsi->lsi_lmd->lmd_mgssec) + 1);
602 if (lsi->lsi_lmd->lmd_opts != NULL)
603 OBD_FREE(lsi->lsi_lmd->lmd_opts,
604 strlen(lsi->lsi_lmd->lmd_opts) + 1);
605 if (lsi->lsi_lmd->lmd_exclude_count)
606 OBD_FREE(lsi->lsi_lmd->lmd_exclude,
607 sizeof(lsi->lsi_lmd->lmd_exclude[0]) *
608 lsi->lsi_lmd->lmd_exclude_count);
609 if (lsi->lsi_lmd->lmd_mgs != NULL)
610 OBD_FREE(lsi->lsi_lmd->lmd_mgs,
611 strlen(lsi->lsi_lmd->lmd_mgs) + 1);
612 if (lsi->lsi_lmd->lmd_osd_type != NULL)
613 OBD_FREE(lsi->lsi_lmd->lmd_osd_type,
614 strlen(lsi->lsi_lmd->lmd_osd_type) + 1);
615 if (lsi->lsi_lmd->lmd_params != NULL)
616 OBD_FREE(lsi->lsi_lmd->lmd_params, 4096);
618 OBD_FREE(lsi->lsi_lmd, sizeof(*lsi->lsi_lmd));
621 LASSERT(lsi->lsi_llsbi == NULL);
622 OBD_FREE(lsi, sizeof(*lsi));
623 s2lsi_nocast(sb) = NULL;
628 /* The lsi has one reference for every server that is using the disk -
629 e.g. MDT, MGS, and potentially MGC */
630 int lustre_put_lsi(struct super_block *sb)
632 struct lustre_sb_info *lsi = s2lsi(sb);
634 LASSERT(lsi != NULL);
636 CDEBUG(D_MOUNT, "put %p %d\n", sb, atomic_read(&lsi->lsi_mounts));
637 if (atomic_dec_and_test(&lsi->lsi_mounts)) {
638 if (IS_SERVER(lsi) && lsi->lsi_osd_exp) {
639 lu_device_put(&lsi->lsi_dt_dev->dd_lu_dev);
640 lsi->lsi_osd_exp->exp_obd->obd_lvfs_ctxt.dt = NULL;
641 lsi->lsi_dt_dev = NULL;
642 obd_disconnect(lsi->lsi_osd_exp);
643 /* wait till OSD is gone */
644 obd_zombie_barrier();
653 * <FSNAME><SEPERATOR><TYPE><INDEX>
654 * FSNAME is between 1 and 8 characters (inclusive).
655 * Excluded characters are '/' and ':'
656 * SEPERATOR is either ':' or '-'
657 * TYPE: "OST", "MDT", etc.
658 * INDEX: Hex representation of the index
661 /** Get the fsname ("lustre") from the server name ("lustre-OST003F").
662 * @param [in] svname server name including type and index
663 * @param [out] fsname Buffer to copy filesystem name prefix into.
664 * Must have at least 'strlen(fsname) + 1' chars.
665 * @param [out] endptr if endptr isn't NULL it is set to end of fsname
668 int server_name2fsname(const char *svname, char *fsname, const char **endptr)
672 dash = svname + strnlen(svname, 8); /* max fsname length is 8 */
673 for (; dash > svname && *dash != '-' && *dash != ':'; dash--)
678 if (fsname != NULL) {
679 strncpy(fsname, svname, dash - svname);
680 fsname[dash - svname] = '\0';
688 EXPORT_SYMBOL(server_name2fsname);
691 * Get service name (svname) from string
693 * if endptr isn't NULL it is set to end of fsname *
695 int server_name2svname(const char *label, char *svname, const char **endptr,
701 /* We use server_name2fsname() just for parsing */
702 rc = server_name2fsname(label, NULL, &dash);
709 if (strlcpy(svname, dash + 1, svsize) >= svsize)
714 EXPORT_SYMBOL(server_name2svname);
717 /* Get the index from the obd name.
720 if endptr isn't NULL it is set to end of name */
721 int server_name2index(const char *svname, __u32 *idx, const char **endptr)
727 /* We use server_name2fsname() just for parsing */
728 rc = server_name2fsname(svname, NULL, &dash);
734 if (strncmp(dash, "MDT", 3) == 0)
735 rc = LDD_F_SV_TYPE_MDT;
736 else if (strncmp(dash, "OST", 3) == 0)
737 rc = LDD_F_SV_TYPE_OST;
743 if (strncmp(dash, "all", 3) == 0) {
746 return rc | LDD_F_SV_ALL;
749 index = simple_strtoul(dash, (char **)endptr, 16);
753 /* Account for -mdc after index that is possible when specifying mdt */
754 if (endptr != NULL && strncmp(LUSTRE_MDC_NAME, *endptr + 1,
755 sizeof(LUSTRE_MDC_NAME)-1) == 0)
756 *endptr += sizeof(LUSTRE_MDC_NAME);
760 EXPORT_SYMBOL(server_name2index);
762 /*************** mount common between server and client ***************/
765 int lustre_common_put_super(struct super_block *sb)
769 CDEBUG(D_MOUNT, "dropping sb %p\n", sb);
771 /* Drop a ref to the MGC */
772 rc = lustre_stop_mgc(sb);
773 if (rc && (rc != -ENOENT)) {
775 CERROR("Can't stop MGC: %d\n", rc);
778 /* BUSY just means that there's some other obd that
779 needs the mgc. Let him clean it up. */
780 CDEBUG(D_MOUNT, "MGC still in use\n");
782 /* Drop a ref to the mounted disk */
787 EXPORT_SYMBOL(lustre_common_put_super);
789 static void lmd_print(struct lustre_mount_data *lmd)
793 PRINT_CMD(D_MOUNT, " mount data:\n");
794 if (lmd_is_client(lmd))
795 PRINT_CMD(D_MOUNT, "profile: %s\n", lmd->lmd_profile);
796 PRINT_CMD(D_MOUNT, "device: %s\n", lmd->lmd_dev);
797 PRINT_CMD(D_MOUNT, "flags: %x\n", lmd->lmd_flags);
800 PRINT_CMD(D_MOUNT, "options: %s\n", lmd->lmd_opts);
802 if (lmd->lmd_recovery_time_soft)
803 PRINT_CMD(D_MOUNT, "recovery time soft: %d\n",
804 lmd->lmd_recovery_time_soft);
806 if (lmd->lmd_recovery_time_hard)
807 PRINT_CMD(D_MOUNT, "recovery time hard: %d\n",
808 lmd->lmd_recovery_time_hard);
810 for (i = 0; i < lmd->lmd_exclude_count; i++) {
811 PRINT_CMD(D_MOUNT, "exclude %d: OST%04x\n", i,
812 lmd->lmd_exclude[i]);
816 /* Is this server on the exclusion list */
817 int lustre_check_exclusion(struct super_block *sb, char *svname)
819 struct lustre_sb_info *lsi = s2lsi(sb);
820 struct lustre_mount_data *lmd = lsi->lsi_lmd;
824 rc = server_name2index(svname, &index, NULL);
825 if (rc != LDD_F_SV_TYPE_OST)
826 /* Only exclude OSTs */
829 CDEBUG(D_MOUNT, "Check exclusion %s (%d) in %d of %s\n", svname,
830 index, lmd->lmd_exclude_count, lmd->lmd_dev);
832 for (i = 0; i < lmd->lmd_exclude_count; i++) {
833 if (index == lmd->lmd_exclude[i]) {
834 CWARN("Excluding %s (on exclusion list)\n", svname);
841 /* mount -v -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
842 static int lmd_make_exclusion(struct lustre_mount_data *lmd, const char *ptr)
844 const char *s1 = ptr, *s2;
845 __u32 index, *exclude_list;
848 /* The shortest an ost name can be is 8 chars: -OST0000.
849 We don't actually know the fsname at this time, so in fact
850 a user could specify any fsname. */
851 devmax = strlen(ptr) / 8 + 1;
853 /* temp storage until we figure out how many we have */
854 OBD_ALLOC(exclude_list, sizeof(index) * devmax);
858 /* we enter this fn pointing at the '=' */
859 while (*s1 && *s1 != ' ' && *s1 != ',') {
861 rc = server_name2index(s1, &index, &s2);
863 CERROR("Can't parse server name '%s': rc = %d\n",
867 if (rc == LDD_F_SV_TYPE_OST)
868 exclude_list[lmd->lmd_exclude_count++] = index;
870 CDEBUG(D_MOUNT, "ignoring exclude %.*s: type = %#x\n",
871 (uint)(s2-s1), s1, rc);
873 /* now we are pointing at ':' (next exclude)
874 or ',' (end of excludes) */
875 if (lmd->lmd_exclude_count >= devmax)
878 if (rc >= 0) /* non-err */
881 if (lmd->lmd_exclude_count) {
882 /* permanent, freed in lustre_free_lsi */
883 OBD_ALLOC(lmd->lmd_exclude, sizeof(index) *
884 lmd->lmd_exclude_count);
885 if (lmd->lmd_exclude) {
886 memcpy(lmd->lmd_exclude, exclude_list,
887 sizeof(index) * lmd->lmd_exclude_count);
890 lmd->lmd_exclude_count = 0;
893 OBD_FREE(exclude_list, sizeof(index) * devmax);
897 static int lmd_parse_mgssec(struct lustre_mount_data *lmd, char *ptr)
902 if (lmd->lmd_mgssec != NULL) {
903 OBD_FREE(lmd->lmd_mgssec, strlen(lmd->lmd_mgssec) + 1);
904 lmd->lmd_mgssec = NULL;
907 tail = strchr(ptr, ',');
909 length = strlen(ptr);
913 OBD_ALLOC(lmd->lmd_mgssec, length + 1);
914 if (lmd->lmd_mgssec == NULL)
917 memcpy(lmd->lmd_mgssec, ptr, length);
918 lmd->lmd_mgssec[length] = '\0';
922 static int lmd_parse_string(char **handle, char *ptr)
927 if ((handle == NULL) || (ptr == NULL))
930 if (*handle != NULL) {
931 OBD_FREE(*handle, strlen(*handle) + 1);
935 tail = strchr(ptr, ',');
937 length = strlen(ptr);
941 OBD_ALLOC(*handle, length + 1);
945 memcpy(*handle, ptr, length);
946 (*handle)[length] = '\0';
951 /* Collect multiple values for mgsnid specifiers */
952 static int lmd_parse_mgs(struct lustre_mount_data *lmd, char **ptr)
960 /* Find end of nidlist */
961 while (class_parse_nid_quiet(tail, &nid, &tail) == 0) {}
962 length = tail - *ptr;
964 LCONSOLE_ERROR_MSG(0x159, "Can't parse NID '%s'\n", *ptr);
968 if (lmd->lmd_mgs != NULL)
969 oldlen = strlen(lmd->lmd_mgs) + 1;
971 OBD_ALLOC(mgsnid, oldlen + length + 1);
975 if (lmd->lmd_mgs != NULL) {
976 /* Multiple mgsnid= are taken to mean failover locations */
977 memcpy(mgsnid, lmd->lmd_mgs, oldlen);
978 mgsnid[oldlen - 1] = ':';
979 OBD_FREE(lmd->lmd_mgs, oldlen);
981 memcpy(mgsnid + oldlen, *ptr, length);
982 mgsnid[oldlen + length] = '\0';
983 lmd->lmd_mgs = mgsnid;
989 /** Parse mount line options
990 * e.g. mount -v -t lustre -o abort_recov uml1:uml2:/lustre-client /mnt/lustre
991 * dev is passed as device=uml1:/lustre by mount.lustre
993 static int lmd_parse(char *options, struct lustre_mount_data *lmd)
995 char *s1, *s2, *devname = NULL;
996 struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
1001 LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that "
1002 "/sbin/mount.lustre is installed.\n");
1006 /* Options should be a string - try to detect old lmd data */
1007 if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) {
1008 LCONSOLE_ERROR_MSG(0x163, "You're using an old version of "
1009 "/sbin/mount.lustre. Please install "
1010 "version %s\n", LUSTRE_VERSION_STRING);
1013 lmd->lmd_magic = LMD_MAGIC;
1015 OBD_ALLOC(lmd->lmd_params, 4096);
1016 if (lmd->lmd_params == NULL)
1018 lmd->lmd_params[0] = '\0';
1020 /* Set default flags here */
1025 int time_min = OBD_RECOVERY_TIME_MIN;
1027 /* Skip whitespace and extra commas */
1028 while (*s1 == ' ' || *s1 == ',')
1031 /* Client options are parsed in ll_options: eg. flock,
1034 /* Parse non-ldiskfs options here. Rather than modifying
1035 ldiskfs, we just zero these out here */
1036 if (strncmp(s1, "abort_recov", 11) == 0) {
1037 lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
1039 } else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
1040 lmd->lmd_recovery_time_soft = max_t(int,
1041 simple_strtoul(s1 + 19, NULL, 10), time_min);
1043 } else if (strncmp(s1, "recovery_time_hard=", 19) == 0) {
1044 lmd->lmd_recovery_time_hard = max_t(int,
1045 simple_strtoul(s1 + 19, NULL, 10), time_min);
1047 } else if (strncmp(s1, "noir", 4) == 0) {
1048 lmd->lmd_flags |= LMD_FLG_NOIR; /* test purpose only. */
1050 } else if (strncmp(s1, "nosvc", 5) == 0) {
1051 lmd->lmd_flags |= LMD_FLG_NOSVC;
1053 } else if (strncmp(s1, "nomgs", 5) == 0) {
1054 lmd->lmd_flags |= LMD_FLG_NOMGS;
1056 } else if (strncmp(s1, "noscrub", 7) == 0) {
1057 lmd->lmd_flags |= LMD_FLG_NOSCRUB;
1059 } else if (strncmp(s1, PARAM_MGSNODE,
1060 sizeof(PARAM_MGSNODE) - 1) == 0) {
1061 s2 = s1 + sizeof(PARAM_MGSNODE) - 1;
1062 /* Assume the next mount opt is the first
1063 invalid nid we get to. */
1064 rc = lmd_parse_mgs(lmd, &s2);
1068 } else if (strncmp(s1, "writeconf", 9) == 0) {
1069 lmd->lmd_flags |= LMD_FLG_WRITECONF;
1071 } else if (strncmp(s1, "update", 6) == 0) {
1072 lmd->lmd_flags |= LMD_FLG_UPDATE;
1074 } else if (strncmp(s1, "virgin", 6) == 0) {
1075 lmd->lmd_flags |= LMD_FLG_VIRGIN;
1077 } else if (strncmp(s1, "noprimnode", 10) == 0) {
1078 lmd->lmd_flags |= LMD_FLG_NO_PRIMNODE;
1080 } else if (strncmp(s1, "mgssec=", 7) == 0) {
1081 rc = lmd_parse_mgssec(lmd, s1 + 7);
1085 /* ost exclusion list */
1086 } else if (strncmp(s1, "exclude=", 8) == 0) {
1087 rc = lmd_make_exclusion(lmd, s1 + 7);
1091 } else if (strncmp(s1, "mgs", 3) == 0) {
1093 lmd->lmd_flags |= LMD_FLG_MGS;
1095 } else if (strncmp(s1, "svname=", 7) == 0) {
1096 rc = lmd_parse_string(&lmd->lmd_profile, s1 + 7);
1100 } else if (strncmp(s1, "param=", 6) == 0) {
1102 char *tail = strchr(s1 + 6, ',');
1104 length = strlen(s1);
1108 strncat(lmd->lmd_params, s1 + 6, length);
1109 strcat(lmd->lmd_params, " ");
1111 } else if (strncmp(s1, "osd=", 4) == 0) {
1112 rc = lmd_parse_string(&lmd->lmd_osd_type, s1 + 4);
1117 /* Linux 2.4 doesn't pass the device, so we stuck it at the
1118 end of the options. */
1119 else if (strncmp(s1, "device=", 7) == 0) {
1121 /* terminate options right before device. device
1122 must be the last one. */
1128 s2 = strchr(s1, ',');
1136 memmove(s1, s2, strlen(s2) + 1);
1142 LCONSOLE_ERROR_MSG(0x164, "Can't find the device name "
1143 "(need mount option 'device=...')\n");
1147 s1 = strstr(devname, ":/");
1150 lmd->lmd_flags |= LMD_FLG_CLIENT;
1151 /* Remove leading /s from fsname */
1152 while (*++s1 == '/') ;
1153 /* Freed in lustre_free_lsi */
1154 OBD_ALLOC(lmd->lmd_profile, strlen(s1) + 8);
1155 if (!lmd->lmd_profile)
1157 sprintf(lmd->lmd_profile, "%s-client", s1);
1160 /* Freed in lustre_free_lsi */
1161 OBD_ALLOC(lmd->lmd_dev, strlen(devname) + 1);
1164 strcpy(lmd->lmd_dev, devname);
1166 /* Save mount options */
1167 s1 = options + strlen(options) - 1;
1168 while (s1 >= options && (*s1 == ',' || *s1 == ' '))
1170 if (*options != 0) {
1171 /* Freed in lustre_free_lsi */
1172 OBD_ALLOC(lmd->lmd_opts, strlen(options) + 1);
1175 strcpy(lmd->lmd_opts, options);
1179 lmd->lmd_magic = LMD_MAGIC;
1184 CERROR("Bad mount options %s\n", options);
1188 struct lustre_mount_data2 {
1190 struct vfsmount *lmd2_mnt;
1193 /** This is the entry point for the mount call into Lustre.
1194 * This is called when a server or client is mounted,
1195 * and this is where we start setting things up.
1196 * @param data Mount options (e.g. -o flock,abort_recov)
1198 int lustre_fill_super(struct super_block *sb, void *data, int silent)
1200 struct lustre_mount_data *lmd;
1201 struct lustre_mount_data2 *lmd2 = data;
1202 struct lustre_sb_info *lsi;
1205 CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);
1207 lsi = lustre_init_lsi(sb);
1213 * Disable lockdep during mount, because mount locking patterns are
1219 * LU-639: the obd cleanup of last mount may not finish yet, wait here.
1221 obd_zombie_barrier();
1223 /* Figure out the lmd from the mount options */
1224 if (lmd_parse((char *)(lmd2->lmd2_data), lmd)) {
1230 if (lmd_is_client(lmd)) {
1231 CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile);
1232 if (client_fill_super == NULL)
1233 request_module("lustre");
1234 if (client_fill_super == NULL) {
1235 LCONSOLE_ERROR_MSG(0x165, "Nothing registered for "
1236 "client mount! Is the 'lustre' "
1237 "module loaded?\n");
1241 rc = lustre_start_mgc(sb);
1246 /* Connect and start */
1247 /* (should always be ll_fill_super) */
1248 rc = (*client_fill_super)(sb, lmd2->lmd2_mnt);
1249 /* c_f_s will call lustre_common_put_super on failure */
1252 CERROR("This is client-side-only module, "
1253 "cannot handle server mount.\n");
1257 /* If error happens in fill_super() call, @lsi will be killed there.
1258 * This is why we do not put it here. */
1262 CERROR("Unable to mount %s (%d)\n",
1263 s2lsi(sb) ? lmd->lmd_dev : "", rc);
1265 CDEBUG(D_SUPER, "Mount %s complete\n",
1273 /* We can't call ll_fill_super by name because it lives in a module that
1274 must be loaded after this one. */
1275 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
1276 struct vfsmount *mnt))
1278 client_fill_super = cfs;
1280 EXPORT_SYMBOL(lustre_register_client_fill_super);
1282 void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb))
1284 kill_super_cb = cfs;
1286 EXPORT_SYMBOL(lustre_register_kill_super_cb);
1288 /***************** FS registration ******************/
1289 struct dentry *lustre_mount(struct file_system_type *fs_type, int flags,
1290 const char *devname, void *data)
1292 struct lustre_mount_data2 lmd2 = {
1297 return mount_nodev(fs_type, flags, &lmd2, lustre_fill_super);
1300 void lustre_kill_super(struct super_block *sb)
1302 struct lustre_sb_info *lsi = s2lsi(sb);
1304 if (kill_super_cb && lsi && !IS_SERVER(lsi))
1305 (*kill_super_cb)(sb);
1307 kill_anon_super(sb);
1310 /** Register the "lustre" fs type
1312 struct file_system_type lustre_fs_type = {
1313 .owner = THIS_MODULE,
1315 .mount = lustre_mount,
1316 .kill_sb = lustre_kill_super,
1317 .fs_flags = FS_BINARY_MOUNTDATA | FS_REQUIRES_DEV |
1318 FS_HAS_FIEMAP | FS_RENAME_DOES_D_MOVE,
1320 MODULE_ALIAS_FS("lustre");
1322 int lustre_register_fs(void)
1324 return register_filesystem(&lustre_fs_type);
1327 int lustre_unregister_fs(void)
1329 return unregister_filesystem(&lustre_fs_type);