I have created this script which will monitor long running jobs and alerts the analyst. One advantage of this custom script is to find threshold dynamically using job history!
CREATE PROCEDURE [dbo].[sp_dba_LongRunningJob_Alert]( @EMailAlertAddress varchar(256) = 'DEFAULT.MAIL@DOMAIN.COM', @ThreshholdFactor float = 1.75, @ThreshholdDeltaSec int = 300 ) /* --Description -- This procedure will validate long running SQL jobs and send a mail -- Input parameters -- @EMailAlertAddress - Mail Address -- @ThreshholdFactor - Multiply factor to average time taken to complete the job -- @ThreshholdDeltaSec int - Add this time to threshhold to avoid false alerts from jobs which runs for a short duration --Example -- exec sp_dba_LongRunningJob_Alert 'myalias@domain.com', 1.75, 300 -- Additional Comments -- Threshold is not fixed in this monitor. We are calculating the threshhold using job history and a factor. I execute this procedure once every hour in my environment to identify long running jobs -- Author -- Vijred - -- Created -- 2014-04-15 --Modifications -- None */ as declare @Subject as varchar(128) declare @msg as varchar(4096) declare @StartTime as datetime declare @UpTimeSec as bigint declare @UpTimeFormatted as varchar(128) set nocount on --Determine Server Uptime select @StartTime = login_time from master.sys.dm_exec_sessions where session_id = 1 set @UpTimeSec = datediff(ss, @StartTime, getdate()) set @UpTimeFormatted = cast ((@UpTimeSec / 86400) as varchar(8)) + ' day(s) ' + cast((@UpTimeSec % 86400 / 3600) as varchar(32)) + ' hour(s) ' + cast((@UpTimeSec % 3600 / 60) as varchar(32)) + ' min(s) ' + cast((@UpTimeSec % 60) as varchar(32)) + ' sec(s)' --Set all variables in the message set @msg = 'Server Name: ' + @@SERVERNAME + char(13) + char(13) + 'Server Uptime: ' + cast(@UpTimeFormatted as varchar) + char(13) + char(13) + '----------------------------------------------------------------' + char(13) + char(13) if exists (select * from tempdb.sys.objects where name = '##Jobs' and type in('U')) begin drop table ##Jobs end select sj.job_id, JobName = sj.name, StartDate = sja.start_execution_date, EndDate = sja.stop_execution_date, status = case when isnull(sjh.run_status, -1) = -1 and sja.start_execution_date is null and sja.stop_execution_date is null then 'Idle' when isnull(sjh.run_status, -1) = -1 and sja.start_execution_date is not null and sja.stop_execution_date is null then 'Running' when isnull(sjh.run_status, -1) = 0 then 'Failed' when isnull(sjh.run_status, -1) = 1 then 'Succeeded' when isnull(sjh.run_status, -1) = 2 then 'Retry' when isnull(sjh.run_status, -1) = 3 then 'Canceled' end, RunTime = datediff(SS, sja.start_execution_date, getdate()) into ##Jobs from msdb.dbo.sysjobs sj join msdb.dbo.sysjobactivity sja on sj.job_id = sja.job_id join (select MaxSessionid = max(session_id) from msdb.dbo.syssessions) ss on ss.MaxSessionid = sja.session_id left join msdb.dbo.sysjobhistory sjh on sjh.instance_id = sja.job_history_id if exists (select * from tempdb.sys.objects where name = '##Job_threshold' and type in('U')) begin drop table ##Job_threshold end select jh.job_id, j.name, count(*) as mycount, avg(run_duration)*@ThreshholdFactor+@ThreshholdDeltaSec as targetduration, max(run_duration) as maxduration --select jh.job_id, j.name, count(*) as mycount, (avg(run_duration)-avg(run_duration)) as targetduration, max(run_duration) as maxduration -- comment above line and uncomment this line to test into ##Job_threshold from msdb.dbo.sysjobhistory jh JOIN msdb.dbo.sysjobs j ON jh.job_id = j.job_id where jh.run_status = 1 and jh.step_id = 0 and j.enabled = 1 group by jh.job_id, j.name --select * from ##Job_threshold --select * from ##Jobs select * from ##Job_threshold jt join ##Jobs j on j.job_id = jt.job_id where j.status in ('Retry', 'Running') and j.RunTime > jt.targetduration if (@@ROWCOUNT > 0) begin set @Subject = 'Long Running Job(s) on ' + @@servername set @msg = @msg + 'The following jobs have been running for more than expected time ' + char(13) + char(13) select @msg = @msg + Name + ' ---> Running For ' + cast(RunTime / 60 as varchar) + ' Minutes --> Since ' + cast(StartDate as varchar) + char(13) from ##Job_threshold jt join ##Jobs j on j.job_id = jt.job_id where j.status in ('Retry', 'Running') and j.RunTime > jt.targetduration --Email the SQL Group a list of jobs running over nn hours exec msdb..sp_send_dbmail @recipients = @EMailAlertAddress, @body = @msg, @subject = @Subject end drop table ##Job_threshold drop table ##Jobs SET nocount off GO
– Vijred