check_oracle_health
check_oracle_health copied to clipboard
Patch for local SYSDBA connections and 2 basic Dataguard standby checks
I updated my earlier modification to include two checks that I wrote for an Oracle dataguard standby. I'm not too familiar with the pull request system but I'll try to figure it out and see if that's how to submit a proper patch. For now feel free to review this:
--- check_oracle_health 2013-03-21 14:49:52.000000000 -0500
+++ check_oracle_health_dts 2013-04-23 16:55:34.000000000 -0500
@@ -2084,6 +2084,87 @@
}
+package DBD::Oracle::Server::Database::Dataguard;
+
+use strict;
+
+our @ISA = qw(DBD::Oracle::Server::Database);
+
+sub new {
+ my $class = shift;
+ my %params = @_;
+ my $self = {
+ verbose => $params{verbose},
+ handle => $params{handle},
+ name => $params{name},
+ last_applied_time => undef,
+ lag_minutes => undef,
+ mrp_process => undef,
+ mrp_status => undef,
+ warningrange => $params{warningrange},
+ criticalrange => $params{criticalrange},
+ };
+ bless $self, $class;
+ $self->init(%params);
+ return $self;
+}
+
+sub init {
+ my $self = shift;
+ my %params = @_;
+ $self->init_nagios();
+ if ($params{mode} =~ /server::database::dataguard::lag/) {
+ ($self->{last_applied_time}, $self->{lag_minutes}) =
+ $self->{handle}->fetchrow_array(q{
+ select to_char(max(first_time),'YYYYMMDDHH24MISS')
+ , ceil((sysdate-max(first_time))*24*60)
+ from v$archived_log
+ where applied='YES' and registrar='RFS'
+ });
+ if (! defined $self->{last_applied_time}) {
+ $self->add_nagios_critical("Unable to get archived log apply time");
+ }
+ } elsif ($params{mode} =~ /server::database::dataguard::mrp_status/) {
+ ($self->{mrp_process}, $self->{mrp_status}) =
+ $self->{handle}->fetchrow_array(q{
+ select process, status
+ from v$managed_standby
+ where process like 'MR%'
+ });
+ if (! defined $self->{mrp_process}) {
+ $self->add_nagios_critical("Unable to find MRP process, managed recovery may be stopped");
+ }
+ }
+}
+
+sub nagios {
+ my $self = shift;
+ my %params = @_;
+ if (! $self->{nagios_level}) {
+ if ($params{mode} =~ /server::database::dataguard::lag/) {
+ $self->add_nagios(
+ $self->check_thresholds($self->{lag_minutes}, "60", "120"),
+ sprintf "Dataguard standby lag %d minutes.", $self->{lag_minutes});
+ $self->{warningrange} =~ s/://g;
+ $self->{criticalrange} =~ s/://g;
+ $self->add_perfdata(sprintf "dataguard_lag=%d;%d;%d",
+ $self->{lag_minutes},
+ $self->{warningrange}, $self->{criticalrange});
+ } elsif ($params{mode} =~ /server::database::dataguard::mrp_status/) {
+ my $mrp_message = sprintf "Dataguard managed recovery process %s status is %s.", $self->{mrp_process}, $self->{mrp_status};
+ if ($self->{mrp_status} eq "APPLYING_LOG") {
+ $self->add_nagios_ok($mrp_message);
+ } elsif ($self->{mrp_status} eq "WAIT_FOR_LOG") {
+ # May wrap this into OK status as well
+ $self->add_nagios_warning($mrp_message);
+ } else {
+ $self->add_nagios_critical($mrp_message);
+ }
+ $self->add_perfdata(sprintf "dataguard_mrp_status=%s", $self->{mrp_status});
+ }
+ }
+}
+
package DBD::Oracle::Server::Database::FlashRecoveryArea;
@@ -3634,6 +3715,8 @@
} else {
$self->add_nagios_critical("unable to aquire flash recovery area info");
}
+ } elsif ($params{mode} =~ /server::database::dataguard/) {
+ $self->{dataguard} = DBD::Oracle::Server::Database::Dataguard->new(%params);
} elsif ($params{mode} =~ /server::database::invalidobjects/) {
$self->init_invalid_objects(%params);
} elsif ($params{mode} =~ /server::database::stalestats/) {
@@ -3770,6 +3853,9 @@
$_->nagios(%params);
$self->merge_nagios($_);
}
+ } elsif ($params{mode} =~ /server::database::dataguard/) {
+ $self->{dataguard}->nagios(%params);
+ $self->merge_nagios($self->{dataguard});
} elsif ($params{mode} =~ /server::database::invalidobjects/) {
my @message = ();
push(@message, sprintf "%d invalid objects",
@@ -4739,6 +4825,7 @@
use strict;
use Net::Ping;
+use DBD::Oracle qw(:ora_session_modes);
our @ISA = qw(DBD::Oracle::Server::Connection);
@@ -4803,15 +4890,17 @@
alarm($self->{timeout} - 1); # 1 second before the global unknown timeout
my $dsn = sprintf "DBI:Oracle:%s", $self->{connect};
my $connecthash = { RaiseError => 0, AutoCommit => $self->{commit}, PrintError => 0 };
+ my $username = $self->{username};
if ($self->{username} eq "sys" || $self->{username} eq "sysdba") {
$connecthash = { RaiseError => 0, AutoCommit => $self->{commit}, PrintError => 0,
- #ora_session_mode => DBD::Oracle::ORA_SYSDBA
- ora_session_mode => 0x0002 };
+ ora_session_mode => ORA_SYSDBA };
$dsn = sprintf "DBI:Oracle:";
+ $username = '';
}
+ $self->debug("Connecting to " . $dsn . " as " . $self->{username});
if ($self->{handle} = DBI->connect(
$dsn,
- $self->{username},
+ $username,
$self->{password},
$connecthash)) {
$self->{handle}->do(q{
@@ -5933,6 +6022,12 @@
['server::instance::sysstat::rate',
'sysstat', undef,
'change of sysstat values over time' ],
+ ['server::database::dataguard::lag',
+ 'dataguard-lag', undef,
+ 'Dataguard standby lag' ],
+ ['server::database::dataguard::mrp_status',
+ 'dataguard-mrp-status', undef,
+ 'Dataguard standby MRP status' ],
['server::database::flash_recovery_area::usage',
'flash-recovery-area-usage', undef,
'Used space in flash recovery area' ],
Looks like the source code currently in the repo has undergone some big architecture changes. I've emailed G. Lausser to see if I understand it properly before I make changes to my repo clone and submit anything.