--****************************************************************** --****************************************************************** --****************************************************************** --****************************************************************** -- Algorithm by Carlos Klapp --****************************************************************** --****************************************************************** --****************************************************************** --****************************************************************** /* This algorithm splits the data into positive and negative values. It then finds the rows in the same position and flags them as a wash. If the row does not have a matching pair then it is a unique transaction and is NOT a wash. | Positive values | | Negative values | | AccountId | TransactionDate | Value | | AccountId | TransactionDate | Value | | 1 | 2016/01/01 | $7 | <-> | 1 | 2016/01/01 | -$7 | <- can be deleted | 1 | 2016/02/01 | $7 | <-> | 1 | 2016/06/01 | -$7 | <- can be deleted | 1 | 2016/04/01 | $7 | <-> | 1 | 2016/07/01 | -$7 | <- can be deleted | | | | | 1 | 2016/09/01 | -$7 | <- keep this row | 2 | 2016/05/01 | $8 | <-> | 2 | 2016/11/01 | -$8 | <- can be deleted | | | | | 2 | 2016/12/01 | -$8 | <- keep this row */ --****************************************************************** --****************************************************************** -- Build a tally table --****************************************************************** --****************************************************************** IF OBJECT_ID('dbo.Numbers') IS NOT NULL DROP TABLE dbo.Numbers SELECT TOP 10000 N=IDENTITY(INT, 1, 1) INTO dbo.Numbers FROM master.dbo.syscolumns a CROSS JOIN master.dbo.syscolumns b; ALTER TABLE dbo.Numbers ADD CONSTRAINT NBR_pk PRIMARY KEY(N); --****************************************************************** --****************************************************************** -- Create the table that will hold our test data --****************************************************************** --****************************************************************** IF OBJECT_ID('dbo.WashValues') IS NOT NULL DROP TABLE dbo.WashValues CREATE TABLE dbo.WashValues( RowId int IDENTITY(1,1) PRIMARY KEY ,AccountId INT NOT NULL ,TransactionDate datetime NOT NULL ,Value MONEY NOT NULL ,AbsValue AS CAST(ISNULL(abs(Value), 0) AS MONEY) PERSISTED ,IsWash BIT NOT NULL DEFAULT(0) ) --****************************************************************** --****************************************************************** -- Create our sample data --****************************************************************** --****************************************************************** INSERT INTO dbo.WashValues(AccountId, TransactionDate, Value) SELECT CAST(CRYPT_GEN_RANDOM(1) AS INT) % 10 + 1 AS AccountId ,DATEADD(DAY, n.N, GETDATE()) AS TransactionDate ,CAST(CRYPT_GEN_RANDOM(1) AS INT) % 10 * IIF(CAST(CRYPT_GEN_RANDOM(1) AS INT) % 2 = 0, 1, -1) AS Value FROM dbo.Numbers n WHERE n.N BETWEEN 1 AND 10000 --****************************************************************** -- Insert unique case: single positive row without a wash --****************************************************************** INSERT INTO dbo.WashValues(AccountId, TransactionDate, Value) SELECT 999 AS AccountId ,GETDATE() AS TransactionDate ,999 AS Value --****************************************************************** -- Insert unique case: single negative row without a wash --****************************************************************** INSERT INTO dbo.WashValues(AccountId, TransactionDate, Value) SELECT 888 AS AccountId ,GETDATE() AS TransactionDate ,-888 AS Value --****************************************************************** -- Delete the zero money rows. They sum to zero, so we can ignore them. --****************************************************************** DELETE from dbo.WashValues WHERE AbsValue = 0 --****************************************************************** --****************************************************************** --****************************************************************** --****************************************************************** -- Split the data into a positive and negative list. In this -- example we order the lists by the TransactionDate in ascending -- order. This essentially gives us a FIFO (First In First Out) -- list. --****************************************************************** --****************************************************************** --****************************************************************** --****************************************************************** --****************************************************************** -- Build negative list --****************************************************************** IF OBJECT_ID('dbo.WashValues_Negative') IS NOT NULL DROP TABLE dbo.WashValues_Negative SELECT RowId ,AccountId ,AbsValue ,ROW_NUMBER() OVER(PARTITION BY AccountId, Value ORDER BY TransactionDate) AS SequenceId INTO dbo.WashValues_Negative FROM dbo.WashValues WHERE Value < 0 --****************************************************************** -- Build negative list --****************************************************************** IF OBJECT_ID('dbo.WashValues_Positive') IS NOT NULL DROP TABLE dbo.WashValues_Positive SELECT RowId ,AccountId ,AbsValue ,ROW_NUMBER() OVER(PARTITION BY AccountId, Value ORDER BY TransactionDate) AS SequenceId INTO dbo.WashValues_Positive FROM dbo.WashValues WHERE Value > 0 --****************************************************************** --****************************************************************** -- Locate the equivalent rows and mark them as a wash. --****************************************************************** --****************************************************************** UPDATE w SET w.IsWash = 1 FROM dbo.WashValues w JOIN ( SELECT p.RowId AS n_RowId ,n.RowId AS p_RowId FROM dbo.WashValues_Positive p JOIN dbo.WashValues_Negative n ON p.AccountId = n.AccountId AND n.AbsValue = p.AbsValue AND n.SequenceId = p.SequenceId ) q ON w.RowId IN (q.n_RowId, q.p_RowId) --****************************************************************** --****************************************************************** -- Debug: Inspect the data and make sure the rows are -- correctly paired. --****************************************************************** --****************************************************************** SELECT pw.RowId ,pw.AccountId ,pw.TransactionDate ,pw.Value ,pw.AbsValue ,pw.IsWash ,p.SequenceId ,nw.RowId ,nw.AccountId ,nw.TransactionDate ,nw.Value ,nw.AbsValue ,nw.IsWash ,n.SequenceId FROM dbo.WashValues_Positive p JOIN dbo.WashValues_Negative n ON p.AccountId = n.AccountId AND n.AbsValue = p.AbsValue AND n.SequenceId = p.SequenceId JOIN dbo.WashValues pw ON pw.RowId = p.RowId JOIN dbo.WashValues nw ON nw.RowId = n.RowId GROUP BY pw.RowId ,pw.AccountId ,pw.TransactionDate ,pw.Value ,pw.AbsValue ,pw.IsWash ,p.SequenceId ,nw.RowId ,nw.AccountId ,nw.TransactionDate ,nw.Value ,nw.AbsValue ,nw.IsWash ,n.SequenceId ORDER BY pw.AccountId ,pw.AbsValue ,p.SequenceId --****************************************************************** --****************************************************************** -- Unit Test: The grand total by AccountId must always be the same. --****************************************************************** --****************************************************************** IF EXISTS( SELECT * FROM ( SELECT AccountId ,SUM(Value) AS Total ,COUNT(*) AS NumRows FROM dbo.WashValues WHERE IsWash = 0 GROUP BY AccountId ) q1 FULL OUTER JOIN ( SELECT AccountId ,SUM(Value) AS Total ,COUNT(*) AS NumRows FROM dbo.WashValues GROUP BY AccountId ) q2 ON q2.AccountId = q1.AccountId AND q2.Total = q1.Total where q1.AccountId IS NULL OR q2.AccountId IS null ) BEGIN RAISERROR('Bug in code. Total do not match', 18, 0) end
Thursday, March 3, 2016
Delete washes in accounting data
Subscribe to:
Posts (Atom)