通常,在任何 Solaris 系统中,磁盘上出现越来越多的硬错误表示磁盘已发生故障或者即将发生故障。
如果 iostat -e 报告错误的非零值,则在我们重新启动系统之前不会重置这些值。
当我们用新磁盘替换故障磁盘时,可能会发生这种情况。
该脚本适用于 Solaris 10 和 11.
此 perl 脚本支持重置 sd 和 ssd kstat 错误计数器。
但请注意以下 Oracle 支持人员给出的警告。
注意:重置这些值的最安全方法是重新启动系统!如果我们使用以下脚本,我们需要自担风险。
检查磁盘上 io 错误的一般命令是:
iostat -e ---- errors --- device s/w h/w trn tot sd3 0 0 1 1 sd4 0 0 0 0 sd5 0 0 0 0 nfs1 0 0 0 0 nfs5 0 0 0 0
软错误:磁盘扇区CRC校验失败,需要重新读取
硬错误:CRC 校验重读多次失败
传输错误:I/O 总线报告的错误
总错误数:软错误 + 硬错误 + 传输错误
以 root 用户身份执行此脚本,如下所示:
用法 :
# ./iostat-E_reset.pl Usage: iostat-E_reset.pl [sd|ssd] [instance number] [type] [type]... type values are hard, illrq, media, nodev, ntrdy, pfa recov, soft, and tran, type "all" can be used to reset all of the above type "io" can be used to reset soft, hard, and tran errors
例子 :
# ./iostat-E_reset.pl sd 3 io Resetting sd_transerrs for instance 3, current value 0x1 Resetting sd_softerrs for instance 3, current value 0x0 Resetting sd_harderrs for instance 3, current value 0x0
之路 on it Road.com
脚本
per脚本基本上是在线修改内核以重置磁盘上的io错误。
#!/usr/bin/perl # Oracle Corp Inc. # # This script uses mdb to reset the error counters of a LUN as described in the %types # hash table below. # # This script supports Solaris versions 10 and 11. # # The following document is the reference for this script. # (Doc ID 1012731.1) How to Reset the iostat -E Error Counters Without Rebooting # # THIS SCRIPT MODIFIES THE RUNNING KERNEL - USE IS AT YOUR OWN RISK. # # Date: 1/8/2014 use strict; use integer; use IPC::Open3; my $mdb = "/usr/bin/mdb"; my $os_rev = `/usr/bin/uname -r`; my $drv; my $inst; my $mdb_resp; my $soft_state; my $errstats; my $ks_data; my $ks_type; my %types = ( "hard" => 0, # Hard Errors "illrq" => 0, # Illegal Request Errors "media" => 0, # Media Errors "nodev" => 0, # No Device Errors "ntrdy" => 0, # Device Not Ready Errors "pfa" => 0, # Predictive Failure Analysis Errors "recov" => 0, # Recoverable Errors "soft" => 0, # Soft Errors "tran" => 0, # Transport Errors "all" => 0, # Reset all of the above "io" => 0 # Reset hard, soft, and tran errors ); my %trans = ( "hard" => "sd_harderrs", "illrq" => "sd_rq_illrq_err", "media" => "sd_rq_media_err", "nodev" => "sd_rq_nodev_err", "ntrdy" => "sd_rq_ntrdy_err", "pfa" => "sd_rq_pfa_err", "recov" => "sd_rq_recov_err", "soft" => "sd_softerrs", "tran" => "sd_transerrs" ); chomp($os_rev); sub usage { printf STDERR "Usage: iostat-E_reset.pl [sd|ssd] [instance number] [type] [type]...n"; printf STDERR " type values are hard, illrq, media, nodev, ntrdy, pfa recov, soft, and tran,n"; printf STDERR " type "all" can be used to reset all of the aboven"; printf STDERR " type "io" can be used to reset soft, hard, and tran errorsn"; exit 22; } usage() if @ARGV < 3 or $ARGV[0] !~ /^s?sd$/ or $ARGV[1] !~ /^d+$/; $drv = $ARGV[0]; shift; $inst = $ARGV[0]; shift; while (@ARGV > 0) { usage() if ! defined $types{$ARGV[0]}; $types{$ARGV[0]} = 1; shift; } if ($os_rev !~ /^5.(10|11)/) { die "Solaris version $os_rev is not supported.n"; } if ($> != 0) { die "You must be user root to run this script.n"; } open3(*MDB_WRT, *MDB_RD, "", "$mdb -kw") or die "Cannot execute mdb"; print MDB_WRT "*${drv}_state::softstate 0t${inst}n"; $mdb_resp = <MDB_RD>; if ($mdb_resp =~ /^(p{XDoirt}+)$/) { $soft_state = ; } elsif ($mdb_resp =~ /^mdb: instance p{XDoirt}+ unused$/) { die "ERROR: Instance $inst is unusedn"; } else { print STDERR "ERROR: Reading softstate pointer for instance $instn"; die " Response: $mdb_respn"; } print MDB_WRT "${soft_state}::print struct sd_lun un_errstatsn"; $mdb_resp = <MDB_RD>; if ($mdb_resp =~ /^un_errstats = 0x(p{XDoirt}+)$/) { $errstats = ; } else { print STDERR "ERROR: Reading un_errstats pointer for softstate $soft_staten"; die " Response: $mdb_respn"; } print MDB_WRT "${errstats}::print kstat_t ks_datan"; $mdb_resp = <MDB_RD>; if ($mdb_resp =~ /^ks_data = 0x(p{XDoirt}+)$/) { $ks_data = ; } else { print STDERR "ERROR: Reading ks_data pointer for un_errstats $errstats softstate $soft_staten"; die " Response: $mdb_respn"; } if ($types{"all"}) { foreach my $type (keys %trans) { reset_counter($trans{$type}); } exit 0; } if ($types{"io"}) { $types{"hard"} = 1; $types{"soft"} = 1; $types{"tran"} = 1; } foreach my $type (keys %types) { next if $type eq "all"; next if $type eq "io"; reset_counter($trans{$type}) if $types{$type}; } exit 0; sub reset_counter { print MDB_WRT "${ks_data}::print struct sd_errstats $_[0].data_typen"; $mdb_resp = <MDB_RD>; if ($mdb_resp =~ /^$_[0].data_type = (0xp{XDoirt}+)$/) { $ks_type = ; if ($ks_type ne "0x2") { die "ERROR: Unsupported kstat data type $ks_type for $_[0]n"; } } else { print STDERR "ERROR: Reading data_type value for ks_data $ks_data un_errstats $errstats softstate $soft_staten"; die " Response: $mdb_respn"; } print MDB_WRT "${ks_data}::print -a struct sd_errstats $_[0].value.ui32n"; $mdb_resp = <MDB_RD>; if ($mdb_resp =~ /^(p{XDoirt}+) $_[0].value.ui32 = (?:0x)?(p{XDoirt}+)$/) { my $kstat_addr = ; printf("Resetting %-15s for instance %5s, current value 0x%xn", $_[0], $inst, ); print MDB_WRT "${kstat_addr}/W 0n"; $mdb_resp = <MDB_RD>; if ($mdb_resp !~ /^0x${kstat_addr}:s+(?:0x)?p{XDoirt}+s+=s+0x0$/) { print STDERR "ERROR: Failed to write kstat counter address $kstat_addr,n"; print STDERR " ks_data $ks_data un_errstats $errstats softstate $soft_staten"; die " Response: $mdb_respn"; } } else { print STDERR "ERROR: Unable to obtain kstat counter address for $_[0] reset,n"; print STDERR " ks_data $ks_data un_errstats $errstats softstate $soft_staten"; die " Response: $mdb_respn"; } }
日期:2020-09-17 00:15:16 来源:oir作者:oir