SCOM SQL MP Database monitor unhealthy

Harman Kardon 121 Reputation points
2020-10-23T08:23:34.15+00:00

I have a alwayson cluster setup on a testserver.
Although the database is fine I get the following in scom? Any idea on why? Latest SQL MP

Alert: MSSQL on Windows: Database is in offline/recovery pending/suspect/emergency state

MonitoringStatus
Bad

DatabaseState
ONLINE

IsAccessible
false

IsMirroringMirror
false

IsAlwaysOnReplica
true

ErrorCode
0

ErrorDescription

Operations Manager
Operations Manager
A family of System Center products that provide infrastructure monitoring, help ensure the predictable performance and availability of vital applications, and offer comprehensive monitoring for datacenters and cloud, both private and public.
1,417 questions
0 comments No comments
{count} votes

Accepted answer
  1. Harman Kardon 121 Reputation points
    2020-10-23T11:03:18.213+00:00

    I think I might have solved it. At least the errors went away and has not come back yet.
    I looked at the script above and saw a reference to hadr.
    When I granted the connect right to the hadr_endpoint to the monitoringaccount (nt service\healthservice). The errors went away.

    0 comments No comments

2 additional answers

Sort by: Most helpful
  1. CyrAz 5,181 Reputation points
    2020-10-23T09:22:01.163+00:00

    Long story short : this monitor is based on a SQL query which is stored in a dll which is embedded in the management pack (Microsoft.SQLServer.Windows.Module.Monitoring.dll).
    You can decompile it to find the actual query (it's a Resource named DBStatuses), and then try to run it manually against your database to find why/where it's failing :

    IF OBJECT_ID ('tempdb.dbo.#StatusTable') IS NOT NULL DROP TABLE #StatusTable;
    CREATE TABLE #StatusTable (
     DatabaseName sysname,
     MonitoringStatus nvarchar(20),
     DatabaseState nvarchar(60),
     IsAccessible bit,
     IsMirroringMirror bit,
     IsAlwaysOnReplica bit,
     ErrorCode int, 
     ErrorDescription nvarchar(4000),
     CONSTRAINT IX_StatusTable UNIQUE (DatabaseName)
     );
    
    IF OBJECT_ID ('tempdb.dbo.#MirroringData') IS NOT NULL DROP TABLE #MirroringData;
    CREATE TABLE #MirroringData (
     database_id INT
     ,mirroring_state TINYINT
     );
    
    IF OBJECT_ID ('tempdb.dbo.#AlwaysOnData') IS NOT NULL DROP TABLE #AlwaysOnData;
    CREATE TABLE #AlwaysOnData (
     database_id INT
     ,db_is_primary_replica BIT
     ,role_allow_connections TINYINT
     ,db_suspended_state INT
     ,database_state TINYINT
     ,database_state_desc nvarchar(60)
     );
    
    DECLARE @HasAlwaysOn BIT;
    SET @HasAlwaysOn = (
     SELECT TOP 1 
     CASE 
     WHEN OBJECT_ID('sys.availability_replicas') IS NOT NULL THEN 1
     ELSE 0
     END AS HasAlwaysOn
     FROM master.sys.syscolumns columns
     WHERE [name] = 'replica_id' AND id = OBJECT_ID('sys.databases')
     )
    
    IF @HasAlwaysOn IS NOT NULL AND @HasAlwaysOn != 0
    BEGIN
     DECLARE @AlwaysOnQuery varchar(max)
    
     IF SERVERPROPERTY('ProductMajorVersion') > 11
     SET @AlwaysOnQuery = '
     SELECT 
     d2.database_id
     ,drs.is_primary_replica AS db_is_primary_replica
     ,CASE 
     WHEN drs.is_primary_replica = 1 THEN ar.primary_role_allow_connections
     ELSE ar.secondary_role_allow_connections
     END AS role_allow_connections
     ,CASE 
     WHEN drs.is_suspended = 0 THEN -1
     ELSE suspend_reason
     END AS db_suspended_state
     ,drs.database_state
     ,drs.database_state_desc
     FROM sys.databases AS d2
     JOIN sys.dm_hadr_database_replica_states drs ON drs.database_id = d2.database_id
     JOIN sys.availability_replicas ar ON d2.replica_id = ar.replica_id
     WHERE drs.is_local = 1;'
     ELSE
     SET @AlwaysOnQuery = '
     SELECT 
     d2.database_id
     ,CASE 
     WHEN hars.[role] = 1 THEN 1 
     ELSE 0 
     END AS db_is_primary_replica
     , CASE  
     WHEN hars.[role] = 1 THEN ar.primary_role_allow_connections 
     WHEN hars.[role] = 2 THEN ar.secondary_role_allow_connections 
     ELSE 0 
     END AS role_allow_connections 
     ,CASE 
     WHEN drs.is_suspended = 0 THEN -1
     ELSE suspend_reason
     END AS db_suspended_state
     ,drs.database_state
     ,drs.database_state_desc
     FROM sys.databases AS d2
     JOIN sys.dm_hadr_database_replica_states drs WITH(NOLOCK) ON drs.database_id = d2.database_id
     JOIN sys.availability_replicas ar WITH(NOLOCK) ON d2.replica_id = ar.replica_id
     JOIN sys.dm_hadr_availability_replica_states hars WITH(NOLOCK) ON d2.replica_id = hars.replica_id
     JOIN sys.servers s WITH(NOLOCK) ON s.name = ar.replica_server_name AND s.server_id = 0
     WHERE (drs.is_local = 1 OR (drs.is_local IS NULL AND hars.[role] != 1 AND hars.[role] != 2))'
    
     INSERT INTO #AlwaysOnData
     EXEC (@AlwaysOnQuery)
    END
    
    IF OBJECT_ID('sys.database_mirroring', 'V') IS NOT NULL
     INSERT INTO #MirroringData
     SELECT 
     database_id
     ,mirroring_state
     FROM sys.database_mirroring
     WHERE mirroring_role = 2;
    
    INSERT INTO #StatusTable SELECT 
     [name] AS [DatabaseName]
     ,CASE 
     --db is online and ready to accept connections
     WHEN [state] = 0 AND db_hasaccess = 1
     THEN 'Good'
     --db has just come online and is not ready to accept connections
     WHEN [state] = 0 AND db_hasaccess = 0 AND dv.collation_name IS NULL
     THEN 'NeitherGoodNorBad'
     WHEN [state] = 0 AND db_hasaccess = 0 AND collation_name IS NOT NULL AND has_mirroring_mirror = 0 AND has_alwayson = 0 AND user_access = 1
     THEN 'Good'
     --user does not have access due to lack of permissions
     WHEN [state] = 0 AND db_hasaccess = 0 AND collation_name IS NOT NULL AND has_mirroring_mirror = 0 AND has_alwayson = 0 AND user_access != 1
     THEN 'NeedToCheck'
     WHEN dv.[state] = 2
     THEN 'NeitherGoodNorBad'
     WHEN [state] IN (3, 4, 5, 6)
     THEN 'Bad'
     WHEN dv.is_in_standby = 1 AND is_read_only = 0
     THEN 'Bad'
     --Mirroring
     WHEN dv.[state] = 1 AND has_mirroring_mirror = 1
     THEN 'Good'
     WHEN dv.[state] = 1 AND has_mirroring_mirror = 0
     THEN 'NeedToCheck'
     --AlwaysOn
     WHEN [state] = 0 AND db_hasaccess = 0 AND has_alwayson = 1 AND db_is_primary_replica = 1
     THEN 'Bad'
     WHEN [state] = 0 AND db_hasaccess = 0 AND has_alwayson = 1 AND db_is_primary_replica = 0 AND role_allow_connections > 0
     THEN 'Bad'
     WHEN [state] = 0 AND has_alwayson = 1 AND db_is_primary_replica = 0 AND role_allow_connections = 0
     THEN 'Good'
     ELSE 'NeedToCheck'
     END AS [MonitoringStatus]
     ,dv.state_desc AS [DatabaseState]
     ,db_hasaccess AS [IsAccessible]
     ,CAST(has_mirroring_mirror AS BIT) AS [IsMirroringMirror]
     ,CAST(has_alwayson AS BIT) AS [IsAlwaysOnReplica]
     ,0 AS [ErrorCode]
     ,'' AS [ErrorDescription]
    FROM (
     SELECT 
     d.database_id
     ,d.[name]
     ,d.collation_name
     ,d.is_in_standby
     ,d.is_read_only
     ,d.user_access
     ,d.[state]
     ,d.[state_desc]
     ,CAST(HAS_DBACCESS(d.[name]) AS BIT) AS db_hasaccess
     ,CASE 
     WHEN dm.mirroring_state IS NULL THEN 0
     ELSE 1
     END AS has_mirroring_mirror
     ,CASE 
     WHEN aond.database_id IS NULL THEN 0
     ELSE 1
     END AS has_alwayson
     --Mirroring fields
     ,dm.mirroring_state
     --AlwaysOn fields
     ,aond.db_is_primary_replica
     ,aond.db_suspended_state
     ,aond.role_allow_connections
     ,aond.database_state
     FROM sys.databases d
     LEFT OUTER JOIN #MirroringData dm ON dm.database_id = d.database_id
     LEFT OUTER JOIN #AlwaysOnData aond ON aond.database_id = d.database_id
     ) dv;
    
    DECLARE @dbname sysname, @sql nvarchar(2000)
    
    DECLARE temp CURSOR LOCAL FOR 
    SELECT DatabaseName FROM #StatusTable WHERE MonitoringStatus = 'NeedToCheck';
    
    OPEN temp;
    
    FETCH NEXT FROM temp INTO @dbname;
    
    WHILE @@Fetch_Status=0 BEGIN
     SET @sql = N'USE '+ QUOTENAME(@dbname ,'"');
     BEGIN TRY
     EXEC (@sql);
     END TRY
     BEGIN CATCH
     UPDATE #StatusTable SET MonitoringStatus = 'NeitherGoodNorBad', ErrorCode = @@ERROR, ErrorDescription = ERROR_MESSAGE() WHERE DatabaseName = @dbname;
     END CATCH;
     FETCH NEXT FROM temp INTO @dbname;
    End
    
    CLOSE temp;
    DEALLOCATE temp;
    
    SELECT * FROM #StatusTable
    
    2 people found this answer helpful.

  2. SChalakov 10,261 Reputation points MVP
    2020-10-23T08:34:09.21+00:00

    Hey,

    I have the same case and what I could find out that there is a query run in the master DB, which gets all the values as property bags (all tha values you have listed) and based on those you get the alert.
    The thing is that in my case only one of the SQL Always On node is spiting thius out and I am not quite sure how exactly to troubleshoot it.
    I have an idea who might help us both with more information on tjhis, let me see what I can do.

    Regards,
    Stoyan

    1 person found this answer helpful.