Wednesday, August 27, 2014

Getting error when adding disk to existing diskgroup (VxVM vxdg ERROR V-5-1-10128 Configuration daemon error 441)

The actual my activity is to increase one of the file system size which is part of VXVM mirror volume, when I try to add disks to existing diskgroup I have faced this issue.

This issue happened due to DG configuration mismatching and this post may help you when your in the same situation.  

Current status of FS / Volume:
[root@ivmprd ~]# df -h /opt/nbuback
Filesystem             size   used  avail capacity  Mounted on
/dev/vx/dsk/NDG/LVOL01
                       1.2T   1.1T    64G    95%    /opt/nbuback
[root@ivmprd ~]# vxprint -htg NDG
DG NAME  NCONFIG      NLOG     MINORS   GROUP-ID
ST NAME  STATE        DM_CNT   SPARE_CNT         APPVOL_CNT
DM NAME  DEVICE       TYPE     PRIVLEN  PUBLEN   STATE
RV NAME  RLINK_CNT    KSTATE   STATE    PRIMARY  DATAVOLS  SRL
RL NAME  RVG          KSTATE   STATE    REM_HOST REM_DG    REM_RLNK
CO NAME  CACHEVOL     KSTATE   STATE
VT NAME  RVG          KSTATE   STATE    NVOLUME
V  NAME  RVG/VSET/CO  KSTATE   STATE    LENGTH   READPOL   PREFPLEX UTYPE
PL NAME  VOLUME       KSTATE   STATE    LENGTH   LAYOUT    NCOL/WID MODE
SD NAME  PLEX         DISK     DISKOFFS LENGTH   [COL/]OFF DEVICE   MODE
SV NAME  PLEX         VOLNAME  NVOLLAYR LENGTH   [COL/]OFF AM/NM    MODE
SC NAME  PLEX         CACHE    DISKOFFS LENGTH   [COL/]OFF DEVICE   MODE
DC NAME  PARENTVOL    LOGVOL
SP NAME  SNAPVOL      DCO
EX NAME  ASSOC        VC                       PERMS    MODE     STATE
SR NAME  KSTATE

dg NDG      default      default  20000    1201549779.11.ivmprd

dm USDC_01 store1-usp1_01a1 auto 65536 870216448 -
dm USDC_02 store1-usp1_01a2 auto 65536 870216448 -
dm USDC_03 store1-usp1_01a3 auto 65536 419346688 -
dm USDC_04 store1-usp1_01a4 auto 65536 419346688 -
dm CADC_01 store2-usp2_02b1 auto 65536 870216448 -
dm CADC_02 store2-usp2_02b2 auto 65536 870216448 -
dm CADC_03 store2-usp2_02b3 auto 65536 419346688 -
dm CADC_04 store2-usp2_02b4 auto 65536 419346688 -

v  LVOL01  -         ENABLED  ACTIVE   2537300320 SELECT  -        fsgen
pl LVOL01-03 LVOL01  ENABLED  ACTIVE   2537300320 CONCAT  -        RW
sd USDC_01-01 LVOL01-03 USDC_01 0 870216448 0    store1-usp1_01a1 ENA
sd USDC_02-01 LVOL01-03 USDC_02 0 828223072 870216448 store1-usp1_01a2 ENA
sd USDC_03-01 LVOL01-03 USDC_03 0 419346688 1698439520 store1-usp1_01a3 ENA
sd USDC_02-02 LVOL01-03 USDC_02 828223072 83712 2117786208 store1-usp1_01a2 ENA
sd USDC_03-02 LVOL01-03 USDC_04 0 419346688 2117869920 store1-usp1_01a4 ENA
sd USDC_02-03 LVOL01-03 USDC_02 828306784 83712 2537216608 store1-usp1_01a2 ENA
pl LVOL01-04    LVOL01       ENABLED  ACTIVE   2537300320 CONCAT  -        RW
sd CADC_01-01 LVOL01-04 CADC_01 0  870216448 0        store2-usp2_02b1 ENA
sd CADC_02-01 LVOL01-04 CADC_02 0  828223072 870216448 store2-usp2_02b2 ENA
sd CADC_03-01 LVOL01-04 CADC_03 0  419346688 1698439520 store2-usp2_02b3 ENA
sd CADC_02-02 LVOL01-04 CADC_02 828223072 83712 2117786208 store2-usp2_02b2 ENA
sd CADC_03-02 LVOL01-04 CADC_04 0  419346688 2117869920 store2-usp2_02b4 ENA
sd CADC_02-03 LVOL01-04 CADC_02 828306784 83712 2537216608 store2-usp2_02b2 ENA
[root@ivmprd ~]# vxdisk -g NDG list
DEVICE       TYPE            DISK         GROUP        STATUS
store1-usp1_01a3 auto:cdsdisk    USDC_03  NDG        online
store1-usp1_01a1 auto:cdsdisk    USDC_01  NDG        online
store1-usp1_01a2 auto:cdsdisk    USDC_02  NDG        online
store1-usp1_01a4 auto:cdsdisk    USDC_04  NDG        online
store2-usp2_02b1 auto:cdsdisk    CADC_01  NDG        online
store2-usp2_02b2 auto:cdsdisk    CADC_02  NDG        online
store2-usp2_02b3 auto:cdsdisk    CADC_03  NDG        online
store2-usp2_02b4 auto:cdsdisk    CADC_04  NDG        online
[root@ivmprd ~]#
Now I am planing to increase the file system from 1.2 TB to 1.4TB
Scan the disk:
[root@ivmprd ~]# devfsadm
[root@ivmprd ~]# echo |format
Searching for disks...done

c2t50060E8006CF9F35d33: configured with capacity of 199.99GB
c2t50060E8006CFE877d32: configured with capacity of 199.99GB
c4t50060E8006CF9F25d33: configured with capacity of 199.99GB
c4t50060E8006CFE867d32: configured with capacity of 199.99GB
We can able to see the disk from OS , Now scan the disk from VXVM level:
[root@ivmprd ~]# vxdctl enable
[root@ivmprd ~]# vxdisk list
DEVICE       TYPE            DISK         GROUP        STATUS
disk_0       auto:none       -            -            online invalid
disk_1       auto:none       -            -            online invalid
store1-usp1_01a3 auto:cdsdisk    USDC_03  NDG        online
store1-usp1_01a1 auto:cdsdisk    USDC_01  NDG        online
store1-usp1_01a2 auto:cdsdisk    USDC_02  NDG        online
store1-usp1_01a5 auto            -            -            nolabel
store1-usp1_01a4 auto:cdsdisk    USDC_04  NDG        online
store2-usp2_02b1 auto:cdsdisk    CADC_01  NDG        online
store2-usp2_02b2 auto:cdsdisk    CADC_02  NDG        online
store2-usp2_02b3 auto:cdsdisk    CADC_03  NDG        online
store2-usp2_02b5 auto            -            -            nolabel
store2-usp2_02b4 auto:cdsdisk    CADC_04  NDG        online
[root@ivmprd ~]#
Setup the disk to VXVM control:
[root@ivmprd ~]# vxdisksetup -i store1-usp1_01a5
prtvtoc: /dev/vx/rdmp/store1-usp1_01a5: Unable to read Disk geometry errno = 0x6
[root@ivmprd ~]# vxdisksetup -i store2-usp2_02b5
prtvtoc: /dev/vx/rdmp/store2-usp2_02b5: Unable to read Disk geometry errno = 0x6
[root@ivmprd ~]#
We got the error above due to disks not labeled, Label the disks using format command , since my environment multipath is enabled so we need to to label each path of one disk but I have labeled all the four paths.
Label the disk using format command:
[root@whms7039 /]# format c2t50060E8006CF9F35d33

c2t50060E8006CF9F35d33: configured with capacity of 199.99GB
selecting c2t50060E8006CF9F35d33
[disk formatted]

FORMAT MENU:
        disk       - select a disk
        type       - select (define) a disk type
        partition  - select (define) a partition table
        current    - describe the current disk
        format     - format and analyze the disk
        repair     - repair a defective sector
        label      - write label to the disk
        analyze    - surface analysis
        defect     - defect list management
        backup     - search for backup labels
        verify     - read and display labels
        save       - save new disk/partition definitions
        inquiry    - show vendor, product and revision
        volname    - set 8-character volume name
        !     - execute , then return
        quit
format> p

PARTITION MENU:
        0      - change `0' partition
        1      - change `1' partition
        2      - change `2' partition
        3      - change `3' partition
        4      - change `4' partition
        5      - change `5' partition
        6      - change `6' partition
        7      - change `7' partition
        select - select a predefined table
        modify - modify a predefined partition table
        name   - name the current table
        print  - display the current table
        label  - write partition map and label to the disk
        ! - execute , then return
        quit
partition> p
Current partition table (default):
Total disk cylinders available: 54611 + 2 (reserved cylinders)

Part      Tag    Flag     Cylinders         Size            Blocks
  0       root    wm    0 -    34      131.25MB    (35/0/0)       268800
  1       swap    wu   35 -    69      131.25MB    (35/0/0)       268800
  2     backup    wu    0 - 54610      199.99GB    (54611/0/0) 419412480
  3 unassigned    wm    0                0         (0/0/0)             0
  4 unassigned    wm    0                0         (0/0/0)             0
  5 unassigned    wm    0                0         (0/0/0)             0
  6        usr    wm   70 - 54610      199.74GB    (54541/0/0) 418874880
  7 unassigned    wm    0                0         (0/0/0)             0

partition> l
Ready to label disk, continue? y

partition> q

FORMAT MENU:
        disk       - select a disk
        type       - select (define) a disk type
        partition  - select (define) a partition table
        current    - describe the current disk
        format     - format and analyze the disk
        repair     - repair a defective sector
        label      - write label to the disk
        analyze    - surface analysis
        defect     - defect list management
        backup     - search for backup labels
        verify     - read and display labels
        save       - save new disk/partition definitions
        inquiry    - show vendor, product and revision
        volname    - set 8-character volume name
        !     - execute , then return
        quit
format> q
[root@whms7039 /]# format c4t50060E8006CF9F25d33
selecting c4t50060E8006CF9F25d33
[disk formatted]

FORMAT MENU:
        disk       - select a disk
        type       - select (define) a disk type
        partition  - select (define) a partition table
        current    - describe the current disk
        format     - format and analyze the disk
        repair     - repair a defective sector
        label      - write label to the disk
        analyze    - surface analysis
        defect     - defect list management
        backup     - search for backup labels
        verify     - read and display labels
        save       - save new disk/partition definitions
        inquiry    - show vendor, product and revision
        volname    - set 8-character volume name
        !     - execute , then return
        quit
format> p

PARTITION MENU:
        0      - change `0' partition
        1      - change `1' partition
        2      - change `2' partition
        3      - change `3' partition
        4      - change `4' partition
        5      - change `5' partition
        6      - change `6' partition
        7      - change `7' partition
        select - select a predefined table
        modify - modify a predefined partition table
        name   - name the current table
        print  - display the current table
        label  - write partition map and label to the disk
        ! - execute , then return
        quit
partition> p
Current partition table (original):
Total disk cylinders available: 54611 + 2 (reserved cylinders)

Part      Tag    Flag     Cylinders         Size            Blocks
  0       root    wm    0 -    34      131.25MB    (35/0/0)       268800
  1       swap    wu   35 -    69      131.25MB    (35/0/0)       268800
  2     backup    wu    0 - 54610      199.99GB    (54611/0/0) 419412480
  3 unassigned    wm    0                0         (0/0/0)             0
  4 unassigned    wm    0                0         (0/0/0)             0
  5 unassigned    wm    0                0         (0/0/0)             0
  6        usr    wm   70 - 54610      199.74GB    (54541/0/0) 418874880
  7 unassigned    wm    0                0         (0/0/0)             0

partition> l
Ready to label disk, continue? y

partition> q

FORMAT MENU:
        disk       - select a disk
        type       - select (define) a disk type
        partition  - select (define) a partition table
        current    - describe the current disk
        format     - format and analyze the disk
        repair     - repair a defective sector
        label      - write label to the disk
        analyze    - surface analysis
        defect     - defect list management
        backup     - search for backup labels
        verify     - read and display labels
        save       - save new disk/partition definitions
        inquiry    - show vendor, product and revision
        volname    - set 8-character volume name
        !     - execute , then return
        quit
format> q

[root@whms7039 /]# format c2t50060E8006CFE877d32

c2t50060E8006CFE877d32: configured with capacity of 199.99GB
selecting c2t50060E8006CFE877d32
[disk formatted]

FORMAT MENU:
        disk       - select a disk
        type       - select (define) a disk type
        partition  - select (define) a partition table
        current    - describe the current disk
        format     - format and analyze the disk
        repair     - repair a defective sector
        label      - write label to the disk
        analyze    - surface analysis
        defect     - defect list management
        backup     - search for backup labels
        verify     - read and display labels
        save       - save new disk/partition definitions
        inquiry    - show vendor, product and revision
        volname    - set 8-character volume name
        !     - execute , then return
        quit
format> p

PARTITION MENU:
        0      - change `0' partition
        1      - change `1' partition
        2      - change `2' partition
        3      - change `3' partition
        4      - change `4' partition
        5      - change `5' partition
        6      - change `6' partition
        7      - change `7' partition
        select - select a predefined table
        modify - modify a predefined partition table
        name   - name the current table
        print  - display the current table
        label  - write partition map and label to the disk
        ! - execute , then return
        quit
partition> p
Current partition table (default):
Total disk cylinders available: 54611 + 2 (reserved cylinders)

Part      Tag    Flag     Cylinders         Size            Blocks
  0       root    wm    0 -    34      131.25MB    (35/0/0)       268800
  1       swap    wu   35 -    69      131.25MB    (35/0/0)       268800
  2     backup    wu    0 - 54610      199.99GB    (54611/0/0) 419412480
  3 unassigned    wm    0                0         (0/0/0)             0
  4 unassigned    wm    0                0         (0/0/0)             0
  5 unassigned    wm    0                0         (0/0/0)             0
  6        usr    wm   70 - 54610      199.74GB    (54541/0/0) 418874880
  7 unassigned    wm    0                0         (0/0/0)             0

partition> l
Ready to label disk, continue? y

partition> q

FORMAT MENU:
        disk       - select a disk
        type       - select (define) a disk type
        partition  - select (define) a partition table
        current    - describe the current disk
        format     - format and analyze the disk
        repair     - repair a defective sector
        label      - write label to the disk
        analyze    - surface analysis
        defect     - defect list management
        backup     - search for backup labels
        verify     - read and display labels
        save       - save new disk/partition definitions
        inquiry    - show vendor, product and revision
        volname    - set 8-character volume name
        !     - execute , then return
        quit
format> q
[root@whms7039 /]# format c4t50060E8006CFE867d32
selecting c4t50060E8006CFE867d32
[disk formatted]

FORMAT MENU:
        disk       - select a disk
        type       - select (define) a disk type
        partition  - select (define) a partition table
        current    - describe the current disk
        format     - format and analyze the disk
        repair     - repair a defective sector
        label      - write label to the disk
        analyze    - surface analysis
        defect     - defect list management
        backup     - search for backup labels
        verify     - read and display labels
        save       - save new disk/partition definitions
        inquiry    - show vendor, product and revision
        volname    - set 8-character volume name
        !     - execute , then return
        quit
format> p

PARTITION MENU:
        0      - change `0' partition
        1      - change `1' partition
        2      - change `2' partition
        3      - change `3' partition
        4      - change `4' partition
        5      - change `5' partition
        6      - change `6' partition
        7      - change `7' partition
        select - select a predefined table
        modify - modify a predefined partition table
        name   - name the current table
        print  - display the current table
        label  - write partition map and label to the disk
        ! - execute , then return
        quit
partition> p
Current partition table (original):
Total disk cylinders available: 54611 + 2 (reserved cylinders)

Part      Tag    Flag     Cylinders         Size            Blocks
  0       root    wm    0 -    34      131.25MB    (35/0/0)       268800
  1       swap    wu   35 -    69      131.25MB    (35/0/0)       268800
  2     backup    wu    0 - 54610      199.99GB    (54611/0/0) 419412480
  3 unassigned    wm    0                0         (0/0/0)             0
  4 unassigned    wm    0                0         (0/0/0)             0
  5 unassigned    wm    0                0         (0/0/0)             0
  6        usr    wm   70 - 54610      199.74GB    (54541/0/0) 418874880
  7 unassigned    wm    0                0         (0/0/0)             0

partition> l
Ready to label disk, continue? y

partition> q

FORMAT MENU:
        disk       - select a disk
        type       - select (define) a disk type
        partition  - select (define) a partition table
        current    - describe the current disk
        format     - format and analyze the disk
        repair     - repair a defective sector
        label      - write label to the disk
        analyze    - surface analysis
        defect     - defect list management
        backup     - search for backup labels
        verify     - read and display labels
        save       - save new disk/partition definitions
        inquiry    - show vendor, product and revision
        volname    - set 8-character volume name
        !     - execute , then return
        quit
format> q
[root@whms7039 /]#
Re-read the VXVM configuration again to take changes:
[root@ivmprd ~]# vxdctl enable
[root@ivmprd ~]# vxdisk list
DEVICE       TYPE            DISK         GROUP        STATUS
disk_0       auto:none       -            -            online invalid
disk_1       auto:none       -            -            online invalid
store1-usp1_01a3 auto:cdsdisk    USDC_03  NDG        online
store1-usp1_01a1 auto:cdsdisk    USDC_01  NDG        online
store1-usp1_01a2 auto:cdsdisk    USDC_02  NDG        online
store1-usp1_01a5 auto:none       -            -            online invalid
store1-usp1_01a4 auto:cdsdisk    USDC_04  NDG        online
store2-usp2_02b1 auto:cdsdisk    CADC_01  NDG        online
store2-usp2_02b2 auto:cdsdisk    CADC_02  NDG        online
store2-usp2_02b3 auto:cdsdisk    CADC_03  NDG        online
store2-usp2_02b5 auto:none       -            -            online invalid
store2-usp2_02b4 auto:cdsdisk    CADC_04  NDG        online
[root@ivmprd ~]# vxdisksetup -i store1-usp1_01a5
[root@ivmprd ~]# vxdisksetup -i store2-usp2_02b5
[root@ivmprd ~]# vxdisk list
DEVICE       TYPE            DISK         GROUP        STATUS
disk_0       auto:none       -            -            online invalid
disk_1       auto:none       -            -            online invalid
store1-usp1_01a3 auto:cdsdisk    USDC_03  NDG        online
store1-usp1_01a1 auto:cdsdisk    USDC_01  NDG        online
store1-usp1_01a2 auto:cdsdisk    USDC_02  NDG        online
store1-usp1_01a5 auto:cdsdisk    -            -            online
store1-usp1_01a4 auto:cdsdisk    USDC_04  NDG        online
store2-usp2_02b1 auto:cdsdisk    CADC_01  NDG        online
store2-usp2_02b2 auto:cdsdisk    CADC_02  NDG        online
store2-usp2_02b3 auto:cdsdisk    CADC_03  NDG        online
store2-usp2_02b5 auto:cdsdisk    -            -            online
store2-usp2_02b4 auto:cdsdisk    CADC_04  NDG        online
[root@ivmprd ~]#
Set the site tag for those disks. Since each LUN's are coming from different DC:
[root@ivmprd ~]# vxdisk settag site=USDC store1-usp1_01a5
[root@ivmprd ~]# vxdisk settag site=CADC store2-usp2_02b5
[root@ivmprd ~]#
Add the disks to existing DG (NDG):
[root@ivmprd ~]# vxdg -g NDG adddisk USDC_05=store1-usp1_01a5
VxVM vxdg ERROR V-5-1-10128  Configuration daemon error 441
[root@ivmprd ~]#
Above we got error, to resolve this issue we need to FLUSH the DG.
FLUSH the DG and add the disk to existing DG (NDG):
[root@ivmprd ~]# vxdg flush NDG
[root@ivmprd ~]# vxdg -g NDG adddisk USDC_05=store1-usp1_01a5
[root@ivmprd ~]# vxdg -g NDG adddisk CADC_05=store2-usp2_02b5
[root@ivmprd ~]#
[root@ivmprd ~]# vxdisk -g NDG list
DEVICE       TYPE            DISK         GROUP        STATUS
store1-usp1_01a3 auto:cdsdisk    USDC_03  NDG        online
store1-usp1_01a1 auto:cdsdisk    USDC_01  NDG        online
store1-usp1_01a2 auto:cdsdisk    USDC_02  NDG        online
store1-usp1_01a5 auto:cdsdisk    USDC_05  NDG        online
store1-usp1_01a4 auto:cdsdisk    USDC_04  NDG        online
store2-usp2_02b1 auto:cdsdisk    CADC_01  NDG        online
store2-usp2_02b2 auto:cdsdisk    CADC_02  NDG        online
store2-usp2_02b3 auto:cdsdisk    CADC_03  NDG        online
store2-usp2_02b5 auto:cdsdisk    CADC_05  NDG        online
store2-usp2_02b4 auto:cdsdisk    CADC_04  NDG        online
[root@ivmprd ~]#
Cool... After FLUSH we can able to add those disk to existing DG (NDG). Just check the available size:
[root@ivmprd ~]# vxassist -g NDG maxsize
Maximum volume size: 922343424 (450363Mb)
[root@ivmprd ~]# vxassist -g NDG maxgrow LVOL01 layout=mirror
Volume LVOL01 can be extended by 461170688 to: 2998471008 (1464097Mb+352 sectors)
[root@ivmprd ~]#
Grow the volume size and check the status:
[root@ivmprd ~]# vxresize -g NDG LVOL01 +200G
[root@ivmprd ~]# vxtask list
TASKID  PTID TYPE/STATE    PCT   PROGRESS
   188         RDWRBACK/R 06.87% 2537300320/2956730720/2566121824 RESYNC LVOL01 NDG
[root@ivmprd ~]#
Once sync is completed successfully , just check the status:
[root@whms7039 /]# df -h /opt/nbuback
Filesystem             size   used  avail capacity  Mounted on
/dev/vx/dsk/NDG/LVOL01
                       1.4T   1.1T   256G    82%    /opt/nbuback
[root@ivmprd ~]#
Thanks for reading this post...

For more info you can refer symantec site, Thanks to symantec for providing this document.

http://www.symantec.com/business/support/index?page=content&id=TECH130463


Thursday, August 14, 2014

How to replace faulty primary mirrored root disk in Solaris with SVM setup on fly

Here I am explaining about, how to replace faulty primary mirrored root disk on fly in Solaris with SVM setup.

We may face root disk failure in some case whether it may be due to hardware failure or software error. If we face issue with hardware level like disk hard error, then there is no option to rectify the issue until replace the faulty disk.

In my case the running / booting disk got hard error so I went to vendor to confirm the disk failure for replacement.

Now most of the case SUN/ Oracle vendor will not send FE  - field engineer instead of they will send parts to our DC and we have to arrange our self to replace faulty parts that is call (Customer Replaceable Units (CRU) Replacement Policy).

Here my case also same (CRU), so the below procedure I have performed to replace primary mirrored root disk without rebooting the server successfully.

Technical steps:
1- Identify the faulty disk
2- Completely unconfigure and remove the disk from SVM control, using metadetach, metaclear, and metadb
3- Completely unconfigure the faulty device from the o/s using cfgadm
4- Configure the New/ replaced device from the o/s using cfgadm
5- Reconfigure the disk into SVM using prtvtoc, metadb, metainit, and metattach

Check and identify the status of disks:
Note - we have to compare couple of the below output to make sure the disk is faulty.
root@ivmprod /$ echo|format |head
Searching for disks...done

AVAILABLE DISK SELECTIONS:
   0. c1t0d0 (drive type unknown)
      /pci@1c,600000/scsi@2/sd@0,0
   1. c1t1d0 (SEAGATE-ST557703LSUN36G-0307 cyl 24620 alt 2 hd 27 sec 107)
      /pci@1c,600000/scsi@2/sd@1,0
   2. c1t2d0 (SUN36G cyl 24620 alt 2 hd 27 sec 107)
      /pci@1c,600000/scsi@2/sd@2,0
root@ivmprod /$

root@ivmprod /$ iostat -En
c1t0d0          Soft Errors: 881 Hard Errors: 205 Transport Errors: 144
Vendor: SEAGATE  Product: ST557704LSUN36G  Revision: 0307 Serial No: 7378
Size: 36.42GB (36418595328 bytes)
Media Error: 168 Device Not Ready: 0 No Device: 2 Recoverable: 289
Illegal Request: 592 Predictive Failure Analysis: 0
c1t1d0          Soft Errors: 592 Hard Errors: 6 Transport Errors: 0
Vendor: SEAGATE  Product: ST557703LSUN36G  Revision: 0707 Serial No: 7445
Size: 36.42GB (36418595328 bytes)
Media Error: 0 Device Not Ready: 0 No Device: 0 Recoverable: 0
Illegal Request: 592 Predictive Failure Analysis: 0
root@ivmprod /$
Check currently OS booted with which disk (optional):
root@ivmprod /$ prtconf -pv |grep -i bootpath
        bootpath:  '/pci@1c,600000/scsi@2/disk@0,0:a'
root@ivmprod /$ 
Collect disk controller information:
root@ivmprod /$ cfgadm -al
Ap_Id                     Type         Receptacle   Occupant     Condition
c1                        scsi-bus     connected    configured   unknown
c1::dsk/c1t0d0            disk         connected    configured   unknown
c1::dsk/c1t1d0            disk         connected    configured   unknown
c1::dsk/c1t2d0            disk         connected    configured   unknown
c2                        scsi-bus     connected    unconfigured unknown
c3                        fc-fabric    connected    configured   unknown
c3::50060e8005437940      disk         connected    configured   failing
c3::50060e8005afca40      disk         connected    configured   failing
c4                        fc-fabric    connected    configured   unknown
c4::50060e8005481140      disk         connected    configured   unknown
c4::50060e8005bb9240      disk         connected    configured   unknown
c5                        fc-fabric    connected    configured   unknown
c5::50060e8005437950      disk         connected    configured   failing
c5::50060e8005afca50      disk         connected    configured   failing
c6                        fc-fabric    connected    configured   unknown
c6::50060e8005481150      disk         connected    configured   unknown
c6::50060e8005bb9250      disk         connected    configured   unknown
root@ivmprod /$
Check metadevice status:
root@ivmprod /$ metastat -t |grep Maintenance
   c1t0d0s0    0  No     Maintenance             Mon Jun 23 20:08:18 2014
   c1t0d0s1    0  No     Maintenance             Wed Jun 25 11:00:10 2014
root@ivmprod /$

root@ivmprod /$ metastat | grep State
 State: Needs maintenance
      State: Okay
    State: Needs maintenance
        Device     Start Block  Dbase State        Hot Spare
    State: Okay
        Device     Start Block  Dbase State        Hot Spare
      State: Needs maintenance
      State: Okay
    State: Needs maintenance
        Device     Start Block  Dbase State        Hot Spare
    State: Okay
        Device     Start Block  Dbase State        Hot Spare
      State: Okay
      State: Okay
    State: Okay
        Device     Start Block  Dbase State        Hot Spare
    State: Okay
        Device     Start Block  Dbase State        Hot Spare
root@ivmprod /$
Check metadb status:
root@ivmprod /$ metadb -i
        flags           first blk       block count
      Wm  p  l          16              1034            /dev/dsk/c1t0d0s7
      W   p  l          1050            1034            /dev/dsk/c1t0d0s7
      W   p  l          2084            1034            /dev/dsk/c1t0d0s7
     a    p  luo        16              1034            /dev/dsk/c1t1d0s7
     a    p  luo        1050            1034            /dev/dsk/c1t1d0s7
     a    p  luo        2084            1034            /dev/dsk/c1t1d0s7
o - replica active prior to last mddb configuration change
u - replica is up to date
l - locator for this replica was read successfully
c - replica's location was in /etc/lvm/mddb.cf
p - replica's location was patched in kernel
m - replica is master, this is replica selected as input
W - replica has device write errors
a - replica is active, commits are occurring to this replica
M - replica had problem with master blocks
D - replica had problem with data blocks
F - replica had format problems
S - replica is too small to hold current data base
R - replica had device read errors
root@ivmprod /$
Collect metadevice information and make sure which are the metadevice we have to detach from mirroring:
root@ivmprod /$ metastat -p
d10 -m d11 d12 1
d11 1 1 c1t0d0s0
d12 1 1 c1t1d0s0
d20 -m d21 d22 1
d21 1 1 c1t0d0s1
d22 1 1 c1t1d0s1
d30 -m d31 d32 1
d31 1 1 c1t0d0s6
d32 1 1 c1t1d0s6
root@ivmprod /$
Detach failed disk from mirror / SVM:
root@ivmprod /$ metadetach d10 d11
metadetach: ivmprod: d10: attempt an operation on a submirror that has erred components
root@ivmprod /$
Above we got error so doing forcefully:
root@ivmprod /$ metadetach -f d10 d11
d10: submirror d11 is detached
root@ivmprod /$ metadetach -f d20 d21
d20: submirror d21 is detached
root@ivmprod /$ metadetach d30 d31
d30: submirror d31 is detached
root@ivmprod /$
Check the status of detached disks:
root@ivmprod /$ metastat -p
d10 -m d12 1
d12 1 1 c1t1d0s0
d20 -m d22 1
d22 1 1 c1t1d0s1
d30 -m d32 1
d32 1 1 c1t1d0s6
d11 1 1 c1t0d0s0
d21 1 1 c1t0d0s1
d31 1 1 c1t0d0s6
root@ivmprod /$
Delete faulted metadb/replica devices:
root@ivmprod /$ metadb -d /dev/dsk/c1t0d0s7
root@ivmprod /$ metadb -i
        flags           first blk       block count
     a    p  luo        16              1034            /dev/dsk/c1t1d0s7
     a    p  luo        1050            1034            /dev/dsk/c1t1d0s7
     a    p  luo        2084            1034            /dev/dsk/c1t1d0s7
o - replica active prior to last mddb configuration change
u - replica is up to date
l - locator for this replica was read successfully
c - replica's location was in /etc/lvm/mddb.cf
p - replica's location was patched in kernel
m - replica is master, this is replica selected as input
W - replica has device write errors
a - replica is active, commits are occurring to this replica
M - replica had problem with master blocks
D - replica had problem with data blocks
F - replica had format problems
S - replica is too small to hold current data base
R - replica had device read errors
root@ivmprod /$
Clear detached metadevices and check the status:
root@ivmprod /$ metaclear d11 d21 d31
d11: Concat/Stripe is cleared
d21: Concat/Stripe is cleared
d31: Concat/Stripe is cleared
root@ivmprod /$
root@ivmprod /$ metastat -p
d10 -m d12 1
d12 1 1 c1t1d0s0
d20 -m d22 1
d22 1 1 c1t1d0s1
d30 -m d32 1
d32 1 1 c1t1d0s6
root@ivmprod /$
Unconfigure faulty disk from OS :
root@ivmprod /$ cfgadm -c unconfigure c1::dsk/c1t0d0
cfgadm: Component system is busy, try again: failed to offline: /devices/pci@1c,600000/scsi@2/sd@0,0
     Resource             Information
------------------  -----------------------
/dev/dsk/c1t0d0s1   dump device (dedicated)
root@ivmprod /$
We got error above as dump device is configured with separate slice which is SWAP FS.
Identify dump device:
root@ivmprod /$ dumpadm
      Dump content: kernel pages
       Dump device: /dev/dsk/c1t0d0s1 (dedicated)
Savecore directory: /var/crash/ivmprod
  Savecore enabled: yes
root@ivmprod /$
Identify SWAP MD device:
root@ivmprod /$ swap -l
swapfile             dev  swaplo blocks   free
/dev/md/dsk/d20     85,20     16 16779296 16757472
root@ivmprod /$
Re-configure DUMP device with MD device name:
root@ivmprod /$ dumpadm -d /dev/md/dsk/d20
      Dump content: kernel pages
       Dump device: /dev/md/dsk/d20 (swap)
Savecore directory: /var/crash/ivmprod
  Savecore enabled: yes
root@ivmprod /$
Unconfigure faulty disk from OS again :
root@ivmprod /$ cfgadm -c unconfigure c1::dsk/c1t0d0
root@ivmprod /$ cfgadm -al
Ap_Id                          Type         Receptacle   Occupant     Condition
c1                        scsi-bus     connected    configured   unknown
c1::dsk/c1t0d0            unavailable  connected    unconfigured unknown
c1::dsk/c1t1d0            disk         connected    configured   unknown
c1::dsk/c1t2d0            disk         connected    configured   unknown
c2                        scsi-bus     connected    unconfigured unknown
c3                        fc-fabric    connected    configured   unknown
c3::50060e8005437940      disk         connected    configured   failing
c3::50060e8005afca40      disk         connected    configured   failing
c4                        fc-fabric    connected    configured   unknown
c4::50060e8005481140      disk         connected    configured   unknown
c4::50060e8005bb9240      disk         connected    configured   unknown
c5                        fc-fabric    connected    configured   unknown
c5::50060e8005437950      disk         connected    configured   failing
c5::50060e8005afca50      disk         connected    configured   failing
c6                        fc-fabric    connected    configured   unknown
c6::50060e8005481150      disk         connected    configured   unknown
c6::50060e8005bb9250      disk         connected    configured   unknown
root@ivmprod /$
Now the time to inform our DC engineer to pull out the faulty disk and insert new disk. Get confirmation from DC engineer before configure new disk in to OS level. Also we can see the console logs to confirm whether the disk has been replaced successfully or not.
Below is the console logs for my case:
sc) showlogs

Log entries since JUL 05 14:31:48
----------------------------------
AUG 13 18:02:04 ivmprod: 0004004f: "Indicator HDD0.OK2RM is now ON"
AUG 13 18:10:12 ivmprod: 00040071: "DISK @ HDD0 has been removed."
AUG 13 18:10:16 ivmprod: 0004004f: "Indicator HDD0.OK2RM is now OFF"
AUG 13 18:10:38 ivmprod: 00060000: "SC Login: User admin Logged on."
AUG 13 18:10:48 ivmprod: 00040072: "DISK @ HDD0 has been inserted."
sc)
Configure newly added disk from OS:
root@ivmprod /$ cfgadm -c configure c1::dsk/c1t0d0
root@ivmprod /$ cfgadm -al
Ap_Id                     Type         Receptacle   Occupant     Condition
c1                        scsi-bus     connected    configured   unknown
c1::dsk/c1t0d0            disk         connected    configured   unknown
c1::dsk/c1t1d0            disk         connected    configured   unknown
c1::dsk/c1t2d0            disk         connected    configured   unknown
c2                        scsi-bus     connected    unconfigured unknown
c3                        fc-fabric    connected    configured   unknown
c3::50060e8005437940      disk         connected    configured   failing
c3::50060e8005afca40      disk         connected    configured   failing
c4                        fc-fabric    connected    configured   unknown
c4::50060e8005481140      disk         connected    configured   unknown
c4::50060e8005bb9240      disk         connected    configured   unknown
c5                        fc-fabric    connected    configured   unknown
c5::50060e8005437950      disk         connected    configured   failing
c5::50060e8005afca50      disk         connected    configured   failing
c6                        fc-fabric    connected    configured   unknown
c6::50060e8005481150      disk         connected    configured   unknown
c6::50060e8005bb9250      disk         connected    configured   unknown
root@ivmprod /$ devfsadm -v
root@ivmprod /$ echo|format |head
Searching for disks...done

AVAILABLE DISK SELECTIONS:
   0. c1t0d0 (SUN36G cyl 24620 alt 2 hd 27 sec 107)
      /pci@1c,600000/scsi@2/sd@0,0
   1. c1t1d0 (SEAGATE-ST557703LSUN36G-0307 cyl 24620 alt 2 hd 27 sec 107)
      /pci@1c,600000/scsi@2/sd@1,0
   2. c1t2d0 (SUN36G cyl 24620 alt 2 hd 27 sec 107)
      /pci@1c,600000/scsi@2/sd@2,0
root@ivmprod /$
Copy VTOC from secondary disk to primary disk which we replaced:
root@ivmprod /$prtvtoc /dev/rdsk/c1t1d0s2 |fmthard -s- /dev/rdsk/c1t0d0s2
fmthard:  New volume table of contents now in place.
root@ivmprod /$
Check and compare both VTOC:
root@ivmprod /$ prtvtoc /dev/rdsk/c1t1d0s2
* /dev/rdsk/c1t1d0s2 partition map
*
* Dimensions:
*     512 bytes/sector
*     107 sectors/track
*      27 tracks/cylinder
*    2889 sectors/cylinder
*   24622 cylinders
*   24620 accessible cylinders
*
* Flags:
*   1: unmountable
*  10: read-only
*
*                          First     Sector    Last
* Partition  Tag  Flags    Sector     Count    Sector  Mount Directory
       0      2    00          0  50332158  50332157
       1      3    01   50332158  16779312  67111469
       2      5    00          0  71127180  71127179
       6      0    00   67111470   3489912  70601381
       7      0    00   70601382    525798  71127179
root@ivmprod /$ prtvtoc /dev/rdsk/c1t0d0s2
* /dev/rdsk/c1t0d0s2 partition map
*
* Dimensions:
*     512 bytes/sector
*     107 sectors/track
*      27 tracks/cylinder
*    2889 sectors/cylinder
*   24622 cylinders
*   24620 accessible cylinders
*
* Flags:
*   1: unmountable
*  10: read-only
*
*                          First     Sector    Last
* Partition  Tag  Flags    Sector     Count    Sector  Mount Directory
       0      2    00          0  50332158  50332157
       1      3    01   50332158  16779312  67111469
       2      5    00          0  71127180  71127179
       6      0    00   67111470   3489912  70601381
       7      0    00   70601382    525798  71127179
root@ivmprod /$
Create metadb with three replica on slice 7(s7) to new disk:
root@ivmprod /$ metadb -afc 3 /dev/rdsk/c1t0d0s7
root@ivmprod /$ metadb -i
        flags           first blk       block count
     a        u         16              1034            /dev/dsk/c1t0d0s7
     a        u         1050            1034            /dev/dsk/c1t0d0s7
     a        u         2084            1034            /dev/dsk/c1t0d0s7
     a    p  luo        16              1034            /dev/dsk/c1t1d0s7
     a    p  luo        1050            1034            /dev/dsk/c1t1d0s7
     a    p  luo        2084            1034            /dev/dsk/c1t1d0s7
o - replica active prior to last mddb configuration change
u - replica is up to date
l - locator for this replica was read successfully
c - replica's location was in /etc/lvm/mddb.cf
p - replica's location was patched in kernel
m - replica is master, this is replica selected as input
W - replica has device write errors
a - replica is active, commits are occurring to this replica
M - replica had problem with master blocks
D - replica had problem with data blocks
F - replica had format problems
S - replica is too small to hold current data base
R - replica had device read errors
root@ivmprod /$
Create medadevice using metainit command:
root@ivmprod /$ metainit d11 1 1 c1t0d0s0
d11: Concat/Stripe is setup
root@ivmprod /$ metainit d21 1 1 c1t0d0s1
d21: Concat/Stripe is setup
root@ivmprod /$ metainit d31 1 1 c1t0d0s6
d31: Concat/Stripe is setup
root@ivmprod /$
Check the created medadevice details:
root@ivmprod /$ metastat -p
d10 -m d12 1
d12 1 1 c1t1d0s0
d20 -m d22 1
d22 1 1 c1t1d0s1
d30 -m d32 1
d32 1 1 c1t1d0s6
d11 1 1 c1t0d0s0
d21 1 1 c1t0d0s1
d31 1 1 c1t0d0s6
root@ivmprod /$
Attach newly created metadevice to main metadevice:
root@ivmprod /$ metattach d10 d11
d10: submirror d11 is attached
root@ivmprod /$ metattach d20 d21
d20: submirror d21 is attached
root@ivmprod /$ metattach d30 d31
d30: submirror d31 is attached
root@ivmprod /$
Verify and make sure attached metadevice are correct:
root@ivmprod /$  metastat -p
d10 -m d11 d12 1
d11 1 1 c1t0d0s0
d12 1 1 c1t1d0s0
d20 -m d21 d22 1
d21 1 1 c1t0d0s1
d22 1 1 c1t1d0s1
d30 -m d31 d32 1
d31 1 1 c1t0d0s6
d32 1 1 c1t1d0s6
root@ivmprod /$
Check the syncing status:
root@ivmprod /$ metastat |grep %
    Resync in progress: 1 % done
    Resync in progress: 1 % done
    Resync in progress: 11 % done
root@ivmprod /$
After syncing completed, just make sure all MD device are okay, checking the below detail:
root@ivmprod /$ metastat -t |grep Maintenance
root@ivmprod /$

root@ivmprod /$ metastat | grep State
      State: Okay
      State: Okay
    State: Okay
        Device     Start Block  Dbase State        Hot Spare
    State: Okay
        Device     Start Block  Dbase State        Hot Spare
      State: Okay
      State: Okay
    State: Okay
        Device     Start Block  Dbase State        Hot Spare
    State: Okay
        Device     Start Block  Dbase State        Hot Spare
      State: Okay
      State: Okay
    State: Okay
        Device     Start Block  Dbase State        Hot Spare
    State: Okay
        Device     Start Block  Dbase State        Hot Spare
root@ivmprod /$
Cool...!