troubleshooting Question

SQL Monitoring job not allowing the job to start again

Avatar of VIVEKANANDHAN_PERIASAMY
VIVEKANANDHAN_PERIASAMY asked on
Microsoft SQL ServerMicrosoft SQL Server 2005Microsoft SQL Server 2008
16 Comments2 Solutions944 ViewsLast Modified:
SQL_Job_Monitor JOB monitors the jobs which are running morethan 30 mins and will stop and start the job again.
Problem is ,after starting the SQL job,it again stopping it.I don't to stop it ,SQL_Job_Monitor JOB monitors job shouldn't disturb it again.

I have sql agent which will call this store proc
USE [userdatabase]
GO
/****** Object:  StoredProcedure [dbo].[test]    Script Date: 07/04/2012 16:50:44 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
ALTER procedure [dbo].[test] 
as 
begin 
waitfor delay '00:35:00'
end

Exec msdb.dbo.SQL_Job_Monitor 30, 0,'<ExcludeJob><JobName JOB="Export TMF PerfData"/></ExcludeJob>'
USE [msdb]
GO
/****** Object:  StoredProcedure [dbo].[SQL_Job_Monitor]    Script Date: 07/04/2012 16:46:13 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
ALTER PROCEDURE [dbo].[SQL_Job_Monitor] (
          @threshold_minute int = 30,       -- Default is set to 60 minutes
          @StopJob bit = 0,                  -- Default is not to stop the job 
          @ExcludeJOb XML =NULL -- '<ExcludeJob><JobName JOB="SQL SERVER JOB"/></ExcludeJob>'
          )
 
AS
 
SET NOCOUNT ON
 
DECLARE @is_sysadmin     INT
DECLARE @job_owner       SYSNAME
DECLARE @job_id         UNIQUEIDENTIFIER
DECLARE @JobName         VARCHAR(255)
DECLARE @JobStartTime    DateTime
DECLARE @JobRunningTime bigINT
DECLARE @Msg            VARCHAR(1000)
DECLARE @ExcludeJObTable TABLE (JobName VARCHAR(1000))
DECLARE @Body varchar(max)
DECLARE	@TableHead varchar(max)
DECLARE	@Table_Content varchar(max)
DECLARE @TableTail varchar(max)
DECLARE @mail_subject VARCHAR(255)
 
/** Insert the jobs to be excluded from job monitoring  */
INSERT @ExcludeJObTable (JobName)
SELECT xmllist.rows.value('@JOB','varchar(1000)') 
from @ExcludeJOb.nodes('/ExcludeJob/JobName') xmllist(rows)

-- Step 1: Create intermediate work tables to store the running job data
CREATE TABLE #xp_results (job_id                UNIQUEIDENTIFIER NOT NULL,
                       last_run_date         INT              NOT NULL,
                       last_run_time         INT              NOT NULL,
                       next_run_date         INT              NOT NULL,
                       next_run_time         INT              NOT NULL,
                       next_run_schedule_id  INT              NOT NULL,
                       requested_to_run      INT              NOT NULL, -- BOOL
                       request_source        INT              NOT NULL,
                       request_source_id     sysname          NULL,
                       running               INT              NOT NULL, -- BOOL
                       current_step          INT              NOT NULL,
                       current_retry_attempt INT              NOT NULL,
                       job_state             INT              NOT NULL)

CREATE TABLE #xp_results_new (job_id                UNIQUEIDENTIFIER NOT NULL,
                       last_run_date         INT              NOT NULL,
                       last_run_time         INT              NOT NULL,
                       next_run_date         INT              NOT NULL,
                       next_run_time         INT              NOT NULL,
                       next_run_schedule_id  INT              NOT NULL,
                       requested_to_run      INT              NOT NULL, -- BOOL
                       request_source        INT              NOT NULL,
                       request_source_id     sysname          NULL,
                       running               INT              NOT NULL, -- BOOL
                       current_step          INT              NOT NULL,
                       current_retry_attempt INT              NOT NULL,
                       job_state             INT              NOT NULL)
 
-- Step 2: Capture job execution information (for local jobs only since that's all SQLServerAgent caches)
SELECT @is_sysadmin = ISNULL(IS_SRVROLEMEMBER(N'sysadmin'), 0)
SELECT @job_owner = SUSER_SNAME() 
IF ((@@microsoftversion / 0x01000000) >= 8) -- SQL Server 8.0 or greater
BEGIN
    INSERT INTO #xp_results
    EXECUTE master.dbo.xp_sqlagent_enum_jobs @is_sysadmin, @job_owner, @job_id
END
ELSE
BEGIN
     INSERT INTO #xp_results
     EXECUTE master.dbo.xp_sqlagent_enum_jobs @is_sysadmin, @job_owner
END
 
-- Step 3: Get the current job that exceeded the monitoring threshold
 
DECLARE joblist_cursor CURSOR FOR
SELECT j.name, a.job_id, a.start_execution_date, DATEDIFF(MINUTE,a.start_execution_date,GETDATE()) AS JobRunningTime
FROM #xp_results x
INNER JOIN msdb.dbo.sysjobactivity a
ON x.job_id = a.job_id
INNER join msdb.dbo.sysjobs j
ON a.job_id = j.job_id
     WHERE x.running = 1                                               -- The job is running
     AND j.name  not IN  (SELECT Jobname from @ExcludeJObTable)               --Excluding the job Monitoring
     AND a.session_id = (select max(session_id) from msdb.dbo.sysjobactivity where job_id = a.job_id)
     AND a.stop_execution_date is null                                 -- The job is not complete 
--     AND a.start_execution_date <= dateadd(minute, -@threshold_minute, getdate())     -- Measure the thresdhold
 
OPEN joblist_cursor
 
FETCH NEXT FROM joblist_cursor
INTO @JobName, @job_id, @JobStartTime, @JobRunningTime
 
WHILE @@FETCH_STATUS = 0
BEGIN
     -- Check again if the same job at the starting time is still active
     IF EXISTS (SELECT TOP 1 * FROM msdb.dbo.sysjobactivity WHERE job_id = @job_ID AND stop_execution_date is NULL AND  
     @JobRunningTime>@threshold_minute)
     BEGIN
		
			PRINT 'Stopping job [' + @Jobname + ']'
			EXEC msdb..sp_stop_job @job_name=@JobName
			DECLARE @i INT
			SET @i=1
			INSERT INTO #xp_results_new
			EXECUTE master.dbo.xp_sqlagent_enum_jobs @is_sysadmin, @job_owner, @job_id			
			WHILE EXISTS (SELECT running
			FROM #xp_results_new x
			JOIN msdb.dbo.sysjobs sj ON x.job_id=sj.job_id
			WHERE running=1 OR job_state<>4)
			BEGIN
				IF @i>99
				BEGIN
					SELECT @Msg='The SQL Job - [' + @JobName + '] on ' + @@ServerName + ' has exceeded the job running threshold of ' + CONVERT(varchar(3), @threshold_minute) + ' minutes.'+
					' Job doesn''t stop in timely fashion. Number of trials to stop it: '+CAST(@i AS CHAR(3))
					RAISERROR (@Msg, 16, 1) WITH LOG
					GOTO Failed
				END
				TRUNCATE TABLE #xp_results_new
				PRINT 'It seems that job '+@JobName+' is still running.'
				PRINT 'Attempting to stop it again ... (trial no '+CAST(@i AS CHAR(2))+')'
				EXEC msdb..sp_stop_job @job_name=@JobName
				WAITFOR DELAY '00:00:10'
				INSERT INTO #xp_results_new
				EXECUTE master.dbo.xp_sqlagent_enum_jobs @is_sysadmin, @job_owner, @job_id
				SET @i=@i+1
			END
			TRUNCATE TABLE #xp_results_new
			PRINT 'Starting job [' + @Jobname + ']'
			EXEC msdb..sp_start_job @job_name=@JobName
			WAITFOR DELAY '00:00:10'
			
			INSERT INTO #xp_results_new
			EXECUTE master.dbo.xp_sqlagent_enum_jobs @is_sysadmin, @job_owner, @job_id
			
			IF EXISTS (SELECT 1 FROM #xp_results_new)
				BEGIN
					INSERT INTO [msdb].[dbo].[jobs_stats] (job_id, job_name, job_status, current_step, current_retry_attempt, job_state, entry_date, msg)
					SELECT x.job_id,
					sj.name as job_name,
					CASE running
						WHEN 0 THEN 'Stopped'
						WHEN 1 THEN 'Running'
					END AS job_status,
					current_step ,
					current_retry_attempt ,
					CASE job_state
						WHEN 0 THEN 'Not idle or suspended'
						WHEN 1 THEN 'Executing'
						WHEN 2 THEN 'Waiting For Thread'
						WHEN 3 THEN 'Between Retries'
						WHEN 4 THEN 'Idle'
						WHEN 5 THEN 'Suspended'
						WHEN 6 THEN 'Waiting For Step To Finish'
						WHEN 7 THEN 'Performing Completion Actions'
					END AS job_state,
					GETDATE() AS entry_date,
					'The SQL Job - [' + @JobName + '] on ' + @@ServerName + ' was stopped and started successfully with '+CAST(@i AS CHAR(2))+' stop trial' AS msg
					FROM #xp_results_new x
					JOIN msdb.dbo.sysjobs sj ON x.job_id=sj.job_id
					TRUNCATE TABLE #xp_results_new;
				END      

			IF EXISTS (SELECT TOP 1 * FROM msdb.dbo.sysjobactivity WHERE job_id = @job_ID AND stop_execution_date is NULL AND  
		    DATEDIFF(MINUTE,start_execution_date,GETDATE())>@threshold_minute)
		 
			BEGIN
				SELECT @Msg = 'The SQL Job - [' + @JobName + '] on ' + @@ServerName + ' has exceeded the job running threshold of ' + CONVERT(varchar(3), @threshold_minute) + ' minutes.'
				RAISERROR (@Msg, 16, 1) WITH LOG
				
				INSERT INTO #xp_results_new
				EXECUTE master.dbo.xp_sqlagent_enum_jobs @is_sysadmin, @job_owner, @job_id
				
				Failed:
				IF EXISTS (SELECT 1 FROM #xp_results_new)
				BEGIN
					INSERT INTO [msdb].[dbo].[jobs_stats] (job_id, job_name, job_status, current_step, current_retry_attempt, job_state, entry_date, msg)
					SELECT x.job_id,
					sj.name as job_name,
					CASE running
						WHEN 0 THEN 'Stopped'
						WHEN 1 THEN 'Running'
					END AS job_status,
					current_step ,
					current_retry_attempt ,
					CASE job_state
						WHEN 0 THEN 'Not idle or suspended'
						WHEN 1 THEN 'Executing'
						WHEN 2 THEN 'Waiting For Thread'
						WHEN 3 THEN 'Between Retries'
						WHEN 4 THEN 'Idle'
						WHEN 5 THEN 'Suspended'
						WHEN 6 THEN 'Waiting For Step To Finish'
						WHEN 7 THEN 'Performing Completion Actions'
					END AS job_state,
					GETDATE() AS entry_date,
					@Msg AS msg
					FROM #xp_results_new x
					JOIN msdb.dbo.sysjobs sj ON x.job_id=sj.job_id
					TRUNCATE TABLE #xp_results_new;
				/*IF @StopJob = 1
					BEGIN
						PRINT 'Stopping job [' + @Jobname + ']'
						EXEC msdb..sp_stop_job @job_name=@JobName
					END*/
				
				END
			END
     END
	
	
	 ELSE
	
	 
     FETCH NEXT FROM joblist_cursor
     INTO @JobName, @job_id, @JobStartTime, @JobRunningTime

END
 
CLOSE joblist_cursor
DEALLOCATE joblist_cursor
DROP TABLE #xp_results
DROP TABLE #xp_results_new
ASKER CERTIFIED SOLUTION
nishant joshi
Technology Development Consultant

Our community of experts have been thoroughly vetted for their expertise and industry experience.

Join our community to see this answer!
Unlock 2 Answers and 16 Comments.
Start Free Trial
Learn from the best

Network and collaborate with thousands of CTOs, CISOs, and IT Pros rooting for you and your success.

Andrew Hancock - VMware vExpert
See if this solution works for you by signing up for a 7 day free trial.
Unlock 2 Answers and 16 Comments.
Try for 7 days

”The time we save is the biggest benefit of E-E to our team. What could take multiple guys 2 hours or more each to find is accessed in around 15 minutes on Experts Exchange.

-Mike Kapnisakis, Warner Bros