Estimate KWG Completion

USE CASE:

CONTRAINDICATIONS:

QUERY:

SET NOCOUNT ON;

/*
Manually assign a number (or 0) to @current_num for percentage progress indication;
From a shell in the KWG/script directory, run: cat log-*.log | grep -P "kw\t" | wc -l
*/
declare @current_num bigint;
set @current_num = 0;

declare @lst table (template_id int, sql varchar(max), vnt float);
declare @math table (vnt int, cnt bigint)
declare @id varchar(max);
declare @sql varchar(max);
declare @vnt float;
declare @total bigint;
declare @avg float;

--Compile list of select statements and variant count per template
insert into @lst (template_id, sql, vnt)
select distinct
	t.template_id
	,'select count(*) from ' +
		(select case
			when te.opr1 like '%nolock%' then te.opr1
			when te.opr1 like '%where%' then replace(te.opr1,' where ',' with(nolock) where ')
			else te.opr1 + ' with(nolock)'
		end)
	,replace((select sum(a.cnt)
		from (
			select
				count(*) cnt
			from
				kwg.kw_variant kwv with(nolock)
			where
				coalesce(t.kw_explosion_single_variation,1) = 1
				and kwv.template_id = t.template_id
				and kwv.variant_group = 0
			union select
				count(*) cnt
			from
				kwg.kw_variant kwv with(nolock)
				join kwg.template_datafeed tdf with(nolock) on tdf.template_id = kwv.template_id
			where
				t.kw_explosion_single_variation = 0
				and kwv.template_id = t.template_id
				and kwv.variant_group = 0
			) a
		),0,1)
from
	template t with(nolock)
	join template_elements te with(nolock) on te.template_id = t.template_id
where
	te.op = 'indx'
	and t.is_active = 1
	and te.opr1 is not null
	and t.kw_explosion_order is not null
	and t.kw_explosion_single_variation is not null

	--Comment out the following line for a count of all KWG templates regardless of status
	--and t.kw_explosion_status_id != 2

--select * from @lst;

--Store the average of vnt to normalize the vnt multiplier since high vnt on certain templates inflates the result.
--Ideally this shouldn't be needed, but more work needs to be done to select more accurate counts in the first place.
set @avg = (select avg(vnt) from @lst);

--print @avg;

--Iterate through select statements
while (select count(*) from @lst) > 0
begin
	--Track and report current select statement
	set @id = (select top 1 cast(template_id as varchar) + ', ' + sql from @lst order by template_id, sql);
	print cast(getdate() as varchar) + ' executing for t' + @id;

	--Store select statement
	set @sql = (select top 1 sql from @lst where cast(template_id as varchar) + ', ' + sql = @id order by template_id, sql);

	--Execute select statement and store count from datasource
	insert into @math (cnt) exec (@sql);

	--Normalize vnt of current template for standard deviation greater than 2.
	set @vnt = (select vnt from @lst where cast(template_id as varchar) + ', ' + sql = @id);
	--print @vnt;

	set @vnt = (select case when @vnt/@avg > 2.0 then @avg + 3 else @vnt end);
	--print @vnt;

	--Match variant count with returned datsource count
	--and failsafe in case select statement fails to insert an integer
	update @math set cnt = coalesce(cnt,0) where cnt is null;
	update @math set vnt = coalesce(@vnt,0) where vnt is null;

	--Remove current select statement from list
	delete from @lst where cast(template_id as varchar) + ', ' + sql = @id;
end

--Multiply the variant count with the datasource count of each template and return the sum of products.
set @total = (select sum(vnt*cnt) from @math);

--The final output represents the approximate number of non-distinct keywords that could be generated.
--If @current_num was specifed at top, also report the keyword generation percentage, else 0%
select @total as Total, cast(round(((cast(@current_num as float)/@total)*100),2) as varchar) + '%' as [Pct Generated];

--select * from @math;

 

NOTES: