Finding Missing Indexes Script

Hi Experts,

I ran the script below and attached are the results, perhaps someone can help me read and identify the missing indexes in our DB.

Thanks
USE [<your_db_name_here>] --naturally make sure you are in the right db

SET DEADLOCK_PRIORITY LOW --probably irrelevant, but just in case

DECLARE @list_missing_indexes bit
DECLARE @table_name sysname

--NOTE: showing missing indexes can take some time; set to 0 if you don't want to wait.
SET @list_missing_indexes = 1
SET @table_name = '%' --<<-- !your table name / LIKE pattern goes here!
--SET @table_name = '%' --all tables

SELECT * FROM sys.dm_db_index_physical_stats ( DB_ID(), OBJECT_ID(@table_name), NULL, NULL, 'LIMITED')

PRINT 'Started @ ' + CONVERT(varchar(30), GETDATE(), 120)

IF @list_missing_indexes = 1
BEGIN
    SELECT
        GETDATE() AS capture_date,
        DB_NAME(mid.database_id) AS Db_Name,
        dps.row_count,
        OBJECT_NAME(mid.object_id /*, mid.database_id*/) AS Table_Name,
        mid.equality_columns, mid.inequality_columns, mid.included_columns,
        user_seeks, user_scans, ca1.max_days_active, unique_compiles, 
        last_user_seek, last_user_scan, avg_total_user_cost, avg_user_impact,
        system_seeks, system_scans, last_system_seek, last_system_scan, avg_total_system_cost, avg_system_impact,
        mid.statement, mid.object_id, mid.index_handle
    FROM sys.dm_db_missing_index_details mid WITH (NOLOCK)
    CROSS APPLY (
        SELECT DATEDIFF(DAY, create_date, GETDATE()) AS max_days_active FROM sys.databases WHERE name = 'tempdb'
    ) AS ca1
    LEFT OUTER JOIN sys.dm_db_missing_index_groups mig WITH (NOLOCK) ON
        mig.index_handle = mid.index_handle
    LEFT OUTER JOIN sys.dm_db_missing_index_group_stats migs WITH (NOLOCK) ON
        migs.group_handle = mig.index_group_handle
    LEFT OUTER JOIN sys.dm_db_partition_stats dps WITH (NOLOCK) ON
        dps.object_id = mid.object_id AND
        dps.index_id IN (0, 1)
    --order by
        --DB_NAME, Table_Name, equality_columns
    WHERE
        1 = 1 
        AND mid.database_id = DB_ID() --only current db
        AND OBJECT_NAME(mid.object_id) LIKE @table_name
        --AND mid.object_id IN (OBJECT_ID('<table_name_1>'), OBJECT_ID('<table_name_2>'))
    ORDER BY
        --avg_total_user_cost * (user_seeks + user_scans) DESC,
        Db_Name, Table_Name, equality_columns, inequality_columns
END --IF

PRINT 'Midpoint @ ' + CONVERT(varchar(30), GETDATE(), 120)
-- list index usage stats (seeks, scans, etc.)
SELECT 
    ius2.row_num, DB_NAME() AS db_name,
    CASE WHEN i.name LIKE ca2.table_name + '%' 
         THEN '~' + SUBSTRING(i.name, LEN(ca2.table_name) + 1, 200)
         ELSE i.name END AS index_name,
    CASE WHEN i.is_unique = 0 THEN 'N' ELSE 'Y' END + '.' + 
    CASE WHEN i.is_primary_key = 0 AND i.is_unique_constraint = 0 THEN 'N' ELSE 'Y' END AS [uniq?],
    ca2.table_name,
    i.index_id, --ius.user_seeks + ius.user_scans AS total_reads,
    dps.row_count,
    SUBSTRING(key_cols, 3, 8000) AS key_cols, SUBSTRING(nonkey_cols, 3, 8000) AS nonkey_cols,
    ius.user_seeks, ius.user_scans, ius.user_lookups, ius.user_updates,
    ius.last_user_seek, ius.last_user_scan, ius.last_user_lookup, ius.last_user_update,
    fk.Reference_Count AS fk_ref_count,
    DATEDIFF(DAY, CASE WHEN o.create_date > ca1.sql_startup_date THEN o.create_date 
        ELSE ca1.sql_startup_date END, GETDATE()) AS max_days_active,
    FILEGROUP_NAME(i.data_space_id) AS filegroup_name,
    ius.system_seeks, ius.system_scans, ius.system_lookups, ius.system_updates,
    ius.last_system_seek, ius.last_system_scan, ius.last_system_lookup, ius.last_system_update
FROM sys.indexes i WITH (NOLOCK)
INNER JOIN sys.objects o WITH (NOLOCK) ON
    o.object_id = i.object_id
CROSS JOIN (
    SELECT create_date AS sql_startup_date FROM sys.databases WHERE name = 'tempdb'
) AS ca1
CROSS APPLY (
    SELECT OBJECT_NAME(i.object_id/*, DB_ID()*/) AS table_name
) AS ca2
OUTER APPLY (
    SELECT
        ', ' + COL_NAME(object_id, ic.column_id)
    FROM sys.index_columns ic
    WHERE
        ic.key_ordinal > 0 AND
        ic.object_id = i.object_id AND
        ic.index_id = i.index_id
    ORDER BY
        ic.key_ordinal
    FOR XML PATH('')
) AS key_cols (key_cols)
OUTER APPLY (
    SELECT
        ', ' + COL_NAME(object_id, ic.column_id)
    FROM sys.index_columns ic
    WHERE
        ic.key_ordinal = 0 AND
        ic.object_id = i.object_id AND
        ic.index_id = i.index_id
    ORDER BY
        COL_NAME(object_id, ic.column_id)
    FOR XML PATH('')
) AS nonkey_cols (nonkey_cols)
LEFT OUTER JOIN sys.dm_db_partition_stats dps WITH (NOLOCK) ON
    dps.object_id = i.object_id AND
    dps.index_id = i.index_id
LEFT OUTER JOIN sys.dm_db_index_usage_stats ius WITH (NOLOCK) ON
    ius.database_id = DB_ID() AND
    ius.object_id = i.object_id AND
    ius.index_id = i.index_id
LEFT OUTER JOIN (
    SELECT
        database_id, object_id, MAX(user_scans) AS user_scans,
        ROW_NUMBER() OVER (ORDER BY MAX(user_scans) DESC) AS row_num --user_scans|user_seeks+user_scans
    FROM sys.dm_db_index_usage_stats WITH (NOLOCK)
    WHERE
        database_id = DB_ID()
        --AND index_id > 0
    GROUP BY
        database_id, object_id
) AS ius2 ON
    ius2.database_id = DB_ID() AND
    ius2.object_id = i.object_id
LEFT OUTER JOIN (
    SELECT
        referenced_object_id, COUNT(*) AS Reference_Count
    FROM sys.foreign_keys
    WHERE
        is_disabled = 0
    GROUP BY
        referenced_object_id
) AS fk ON
    fk.referenced_object_id = i.object_id
WHERE
    i.object_id > 100 AND
    i.is_hypothetical = 0 AND
    i.type IN (0, 1, 2) AND
    o.type NOT IN ( 'IF', 'IT', 'TF', 'TT' ) AND
    (
     o.name LIKE @table_name  AND
     o.name NOT LIKE 'dtprop%' AND
     o.name NOT LIKE 'filestream[_]' AND
     o.name NOT LIKE 'MSpeer%' AND
     o.name NOT LIKE 'MSpub%' AND
     --o.name NOT LIKE 'queue[_]%' AND 
     o.name NOT LIKE 'sys%' 
    )
    --AND OBJECT_NAME(i.object_id /*, DB_ID()*/) IN ('tbl1', 'tbl2', 'tbl3')
ORDER BY
    --row_count DESC,
    --ius.user_scans DESC,
    --ius2.row_num, --user_scans&|user_seeks
    db_name, table_name, 
    -- list clustered index first, if any, then other index(es)
    CASE WHEN i.index_id IN (0, 1) THEN 1 ELSE 2 END, 
    key_cols

PRINT 'Ended @ ' + CONVERT(varchar(30), GETDATE(), 120)

SET DEADLOCK_PRIORITY NORMAL

Open in new window

First.txt
Second.txt
LVL 5
bfuchsAsked:
Who is Participating?
I wear a lot of hats...

"The solutions and answers provided on Experts Exchange have been extremely helpful to me over the last few years. I wear a lot of hats - Developer, Database Administrator, Help Desk, etc., so I know a lot of things but not a lot about one thing. Experts Exchange gives me answers from people who do know a lot about one thing, in a easy to use platform." -Todd S.

ste5anSenior DeveloperCommented:
The first query shows you which columns are used in which fashion. Equality columns are used for looking values by equality (=), inequality columns are used for comparisons other then (=).

Included columns are the columns which must be used to create a covering index.

But you should order the first result by user_seeks DESC.

btw, you should also correlate this with you top expensive queries:

SELECT TOP 100
        GETDATE() AS snapshot_datetime ,
        RANK() OVER ( ORDER BY total_logical_reads + total_logical_writes DESC, sql_handle, statement_start_offset ) AS row_no ,
        creation_time ,
        last_execution_time ,
        total_worker_time / 1000 AS total_worker_time ,
        total_logical_reads ,
        total_logical_writes ,
        execution_count ,
        total_logical_reads + total_logical_writes AS total_logical ,
        CASE WHEN sql_handle IS NULL THEN ' '
             ELSE ( SUBSTRING(st.text, ( qs.statement_start_offset + 2 ) / 2,
                              ( CASE WHEN qs.statement_end_offset = -1 THEN LEN(CONVERT(NVARCHAR(MAX), st.text)) * 2
                                     ELSE qs.statement_end_offset
                                END - qs.statement_start_offset ) / 2) )
        END AS query_text ,
        st.text AS sql_text ,
        DB_NAME(st.dbid) AS database_name ,
        st.objectid AS object_id ,
        qp.query_plan ,
        cp.cacheobjtype ,
        cp.objtype ,
        qs.plan_handle
FROM    sys.dm_exec_query_stats qs
        INNER JOIN sys.dm_exec_cached_plans cp ON cp.plan_handle = qs.plan_handle
        CROSS APPLY sys.dm_exec_sql_text(sql_handle) st
        CROSS APPLY sys.dm_exec_query_plan(qs.plan_handle) qp
WHERE   total_logical_reads + total_logical_writes > 0
ORDER BY total_logical DESC;

Open in new window


It makes more sense to start with optimizing those queries.
0
bfuchsAuthor Commented:
@Ste5an,
Few questions
a-When creating an index do I need to include all columns listed in both equality and inequality?
b-I assume not everything that comes up there is really necessary to be created, what is the rule, so let say if ordering by user_seeks desc as you suggested, up till which # is it considered crucial/important?
c-what if more then one index is being suggested for same field, just included columns are different, do I need to create 2 indexes, or just create one and have all included columns as part of the covering index?
d-same question as previous, just with the index columns itself, if one suggestion is for Column ID and the other is for column ID + Name, do I create one or two indexes?
e-what do you mean by optimizing queries, redesign them or create indexed views?
f-what about the other columns there like user_scans avg_user_impact etc do I have to take those in consideration too?
g- what is the meaning of second query and is there a way to figure out also what indexes can be removed?

I tried running your SQL above and got the following message
"Msg 321, Level 15, State 1, Line 26
"sql_handle" is not a recognized table hints option. If it is intended as a parameter to a table-valued function or to the CHANGETABLE function, ensure that your database compatibility mode is set to 90."

Currently this database is set compatibility mode 80, how can I get this working?

Thanks,
Ben
0
ste5anSenior DeveloperCommented:
a) it depends on the queries and how much space you want to use for indices. But normally: Yes. Cause the columns from equality, inequality and include columns should be a covering index. Thus the queries need only to use this index and not further action like lookups.

b) you need to balance read performance vs. write  performance vs. space needed. I'm normally working on one of the top IO queries per week. Especially on queries which are directly related to user interaction.

c) While optimizing I'm creating often different variations of the indices. Then I examine the actual execution plan. I'm also testing index hints to use the one or the other. And when I'm lazy, I'm evaluating the index usage after some hours to decide which index to keep (should not be done on systems under load).

d) Both queries could benefit from both indices. But the performance would not be optimal for both, when you only have one. Here it depends on the cardinality. I guess in most cases it is sufficient to have the second index.

e) The main work is to examine the existing indices and to decide where additional indices are necessary. Rewriting is a part of the process. Often changing the query structure is necessary to get the maximum out of SQL Server. This also means to change ad-hoc queries into views or stored procedure or even functions to get more stable execution plans.

f) Those columns tell you how the index would be used. For seeks or scans. And how often it would be used. Thus the higher the numbers are the more likely an index will impact the performance significantly.

g) none, that I' ve recognized at the first glance.


What SQL Server version do you use? The script should work on 2008R2+. COMPATIBILITY_LEVEL 80 is SQL Server 2000. This is pretty outdated...
0
Ultimate Tool Kit for Technology Solution Provider

Broken down into practical pointers and step-by-step instructions, the IT Service Excellence Tool Kit delivers expert advice for technology solution providers. Get your free copy now.

bfuchsAuthor Commented:
Hi ste5an,

The server was recently upgraded from sql 2005 to SQL 2008, however I guess (I was not involved in the setup) the DB has this backwards compatibility since it was meant to be linked to Access 2000 app,

In the pc I use for analysis, I don't see R2 applied there, maybe this is what I needed in order to execute your script?

If I don't get that to be working, is there any other way I can analyze the database, perhaps  the second query above can help us identify those high IO queries?

I used to look at the Activity Monitor / Recent expensive queries, select estimated execution plan and see what indexes are they suggesting, in case I saw something with a high % impact I applied, does this makes sense? (Actually before applying it I did look into if this query is something users frequently use..)

One more script I used to analyze with is the following,
-- Missing Index Script
-- Original Author: Pinal Dave (C) 2011
SELECT --TOP 25
dm_mid.database_id AS DatabaseID,
dm_migs.avg_user_impact*(dm_migs.user_seeks+dm_migs.user_scans) Avg_Estimated_Impact,
dm_migs.last_user_seek AS Last_User_Seek,
OBJECT_NAME(dm_mid.OBJECT_ID,dm_mid.database_id) AS [TableName],
'CREATE INDEX [IX_' + OBJECT_NAME(dm_mid.OBJECT_ID,dm_mid.database_id) + '_'
+ REPLACE(REPLACE(REPLACE(ISNULL(dm_mid.equality_columns,''),', ','_'),'[',''),']','') +
CASE
WHEN dm_mid.equality_columns IS NOT NULL AND dm_mid.inequality_columns IS NOT NULL THEN '_'
ELSE ''
END
+ REPLACE(REPLACE(REPLACE(ISNULL(dm_mid.inequality_columns,''),', ','_'),'[',''),']','')
+ ']'
+ ' ON ' + dm_mid.statement
+ ' (' + ISNULL (dm_mid.equality_columns,'')
+ CASE WHEN dm_mid.equality_columns IS NOT NULL AND dm_mid.inequality_columns IS NOT NULL THEN ',' ELSE
'' END
+ ISNULL (dm_mid.inequality_columns, '')
+ ')'
+ ISNULL (' INCLUDE (' + dm_mid.included_columns + ')', '') AS Create_Statement
FROM sys.dm_db_missing_index_groups dm_mig
INNER JOIN sys.dm_db_missing_index_group_stats dm_migs
ON dm_migs.group_handle = dm_mig.index_group_handle
INNER JOIN sys.dm_db_missing_index_details dm_mid
ON dm_mig.index_handle = dm_mid.index_handle
WHERE dm_mid.database_ID = DB_ID()
ORDER BY Avg_Estimated_Impact DESC
GO

Open in new window

And I applied the higher numbers returned by that SQL for Avg_Estimated_Impact, wondering what is your opinion regarding that?

Thanks,
Ben
0
ste5anSenior DeveloperCommented:
My query does the same as the activity monitor. btw, there is an table alias missing and sql_handle is a reserverd word. It should work:

SELECT TOP 100
        GETDATE() AS snapshot_datetime ,
        RANK() OVER ( ORDER BY total_logical_reads + total_logical_writes DESC, sql_handle, statement_start_offset ) AS row_no ,
        creation_time ,
        last_execution_time ,
        total_worker_time / 1000 AS total_worker_time ,
        total_logical_reads ,
        total_logical_writes ,
        execution_count ,
        total_logical_reads + total_logical_writes AS total_logical ,
        CASE WHEN sql_handle IS NULL THEN ' '
             ELSE ( SUBSTRING(st.text, ( qs.statement_start_offset + 2 ) / 2,
                              ( CASE WHEN qs.statement_end_offset = -1 THEN LEN(CONVERT(NVARCHAR(MAX), st.text)) * 2
                                     ELSE qs.statement_end_offset
                                END - qs.statement_start_offset ) / 2) )
        END AS query_text ,
        st.text AS sql_text ,
        DB_NAME(st.dbid) AS database_name ,
        st.objectid AS object_id ,
        qp.query_plan ,
        cp.cacheobjtype ,
        cp.objtype ,
        qs.plan_handle
FROM    sys.dm_exec_query_stats qs
        INNER JOIN sys.dm_exec_cached_plans cp ON cp.plan_handle = qs.plan_handle
        CROSS APPLY sys.dm_exec_sql_text(qs.[sql_handle]) st
        CROSS APPLY sys.dm_exec_query_plan(qs.plan_handle) qp
WHERE   total_logical_reads + total_logical_writes > 0
ORDER BY total_logical DESC;

Open in new window

0

Experts Exchange Solution brought to you by

Your issues matter to us.

Facing a tech roadblock? Get the help and guidance you need from experienced professionals who care. Ask your question anytime, anywhere, with no hassle.

Start your 7-day free trial
bfuchsAuthor Commented:
Hi Ste5an,

OK Finally I got it to work on the master DB, the other db was giving me an error.
It took about 13 min and see attached what I got.

I also clicked on query plan and saw few recommendations, should I apply all of them, or at perhaps those that have a big % impact, like >70?
Ste5an.txt
0
bfuchsAuthor Commented:
Hi Ste5AN,
I will not be in office the next few days, I might still have some question until I find myself confident enough to apply those indexes (hope u dont mind..:)
Thanks You!
0
It's more than this solution.Get answers and train to solve all your tech problems - anytime, anywhere.Try it for free Edge Out The Competitionfor your dream job with proven skills and certifications.Get started today Stand Outas the employee with proven skills.Start learning today for free Move Your Career Forwardwith certification training in the latest technologies.Start your trial today
Microsoft SQL Server

From novice to tech pro — start learning today.

Question has a verified solution.

Are you are experiencing a similar issue? Get a personalized answer when you ask a related question.

Have a better answer? Share it in a comment.