Earn the coveted Fabric Analytics Engineer certification. 100% off your exam for a limited time only!
Dear All,
Below is the table which I have already sorted the shippable period based on the descending order. I want to remove the duplicate based on the 'Project' column and keep the Unique row with the latest shippable period which should be FY23P02 in below example.
However, the unique row kept after I use the remove duplicate row function by 'Project' column giving me the shippable period of FY22P09 which is in between the earlier period FY22P07 and latest FY23P02.
How do I remove the duplicate rows based on the 'Project' column and keep the unique row with the latest shippable period?
Solved! Go to Solution.
See the working here - Open a blank query - Home - Advanced Editor - Remove everything from there and paste the below code to test (later on when you use the query on your dataset, you will have to change the source appropriately. If you have columns other than these, then delete Changed type step and do a Changed type for complete table from UI again)
let
Source = Table.FromRows(Json.Document(Binary.Decompress(Binary.FromText("i45WMjYxVdJRcgRit0hDywADCyArODMvPbEgvyhVKVaHVBVGxgEGRlRQYUh7FUYBhgTcQVAFMDwM8duCXYWhkbEJUNAJZg0Wp5KqBBQ1mI6lgRJQyJqMKomNBQA=", BinaryEncoding.Base64), Compression.Deflate)), let _t = ((type nullable text) meta [Serialized.Text = true]) in type table [#"Project Number" = _t, Project = _t, #"Shippable Period" = _t, #"Mgmt Region" = _t]),
#"Changed Type" = Table.TransformColumnTypes(Source,{{"Project Number", Int64.Type}, {"Project", type text}, {"Shippable Period", type text}, {"Mgmt Region", type text}}),
#"Added Custom" = Table.AddColumn(#"Changed Type", "Custom", each Number.From(Text.Select([Shippable Period],{"0".."9"}))),
#"Sorted Rows" = Table.Sort(#"Added Custom",{{"Project", Order.Ascending}, {"Custom", Order.Descending}}),
#"Grouped Rows" = Table.Group(#"Sorted Rows", {"Project Number"}, {{"Temp", each _, type table [Project Number=nullable number, Project=nullable text, Shippable Period=nullable text, Mgmt Region=nullable text, Custom=number]}}),
#"Added Custom1" = Table.AddColumn(#"Grouped Rows", "Custom", each Table.AddIndexColumn([Temp],"Index")),
#"Removed Columns" = Table.RemoveColumns(#"Added Custom1",{"Temp"}),
#"Expanded Custom" = Table.ExpandTableColumn(#"Removed Columns", "Custom", {"Project", "Shippable Period", "Mgmt Region", "Custom", "Index"}, {"Project", "Shippable Period", "Mgmt Region", "Custom.1", "Index"}),
#"Filtered Rows" = Table.SelectRows(#"Expanded Custom", each ([Index] = 0)),
#"Removed Columns1" = Table.RemoveColumns(#"Filtered Rows",{"Custom.1", "Index"})
in
#"Removed Columns1"
Edit - Another approach which is shorter is possible through the use of Inner join
let
Source = Table.FromRows(Json.Document(Binary.Decompress(Binary.FromText("i45WMjYxVdJRcgRit0hDywADCyArODMvPbEgvyhVKVaHVBVGxgEGRlRQYUh7FUYBhgTcQVAFMDwM8duCXYWhkbEJUNAJZg0Wp5KqBBQ1mI6lgRJQyJqMKomNBQA=", BinaryEncoding.Base64), Compression.Deflate)), let _t = ((type nullable text) meta [Serialized.Text = true]) in type table [#"Project Number" = _t, Project = _t, #"Shippable Period" = _t, #"Mgmt Region" = _t]),
#"Changed Type" = Table.TransformColumnTypes(Source,{{"Project Number", Int64.Type}, {"Project", type text}, {"Shippable Period", type text}, {"Mgmt Region", type text}}),
#"Added Custom" = Table.AddColumn(#"Changed Type", "Custom", each Number.From(Text.Select([Shippable Period],{"0".."9"}))),
#"Grouped Rows" = Table.Group(#"Added Custom", {"Project Number"}, {{"Temp", each List.Max([Custom]), type number}}),
#"Merged Queries" = Table.NestedJoin(#"Added Custom", {"Project Number", "Custom"}, #"Grouped Rows", {"Project Number", "Temp"}, "Grouped Rows", JoinKind.Inner),
#"Removed Columns" = Table.RemoveColumns(#"Merged Queries",{"Grouped Rows"}),
#"Removed Duplicates" = Table.Distinct(#"Removed Columns", {"Project", "Custom"}),
#"Removed Columns1" = Table.RemoveColumns(#"Removed Duplicates",{"Custom"})
in
#"Removed Columns1"
See the working here - Open a blank query - Home - Advanced Editor - Remove everything from there and paste the below code to test (later on when you use the query on your dataset, you will have to change the source appropriately. If you have columns other than these, then delete Changed type step and do a Changed type for complete table from UI again)
let
Source = Table.FromRows(Json.Document(Binary.Decompress(Binary.FromText("i45WMjYxVdJRcgRit0hDywADCyArODMvPbEgvyhVKVaHVBVGxgEGRlRQYUh7FUYBhgTcQVAFMDwM8duCXYWhkbEJUNAJZg0Wp5KqBBQ1mI6lgRJQyJqMKomNBQA=", BinaryEncoding.Base64), Compression.Deflate)), let _t = ((type nullable text) meta [Serialized.Text = true]) in type table [#"Project Number" = _t, Project = _t, #"Shippable Period" = _t, #"Mgmt Region" = _t]),
#"Changed Type" = Table.TransformColumnTypes(Source,{{"Project Number", Int64.Type}, {"Project", type text}, {"Shippable Period", type text}, {"Mgmt Region", type text}}),
#"Added Custom" = Table.AddColumn(#"Changed Type", "Custom", each Number.From(Text.Select([Shippable Period],{"0".."9"}))),
#"Sorted Rows" = Table.Sort(#"Added Custom",{{"Project", Order.Ascending}, {"Custom", Order.Descending}}),
#"Grouped Rows" = Table.Group(#"Sorted Rows", {"Project Number"}, {{"Temp", each _, type table [Project Number=nullable number, Project=nullable text, Shippable Period=nullable text, Mgmt Region=nullable text, Custom=number]}}),
#"Added Custom1" = Table.AddColumn(#"Grouped Rows", "Custom", each Table.AddIndexColumn([Temp],"Index")),
#"Removed Columns" = Table.RemoveColumns(#"Added Custom1",{"Temp"}),
#"Expanded Custom" = Table.ExpandTableColumn(#"Removed Columns", "Custom", {"Project", "Shippable Period", "Mgmt Region", "Custom", "Index"}, {"Project", "Shippable Period", "Mgmt Region", "Custom.1", "Index"}),
#"Filtered Rows" = Table.SelectRows(#"Expanded Custom", each ([Index] = 0)),
#"Removed Columns1" = Table.RemoveColumns(#"Filtered Rows",{"Custom.1", "Index"})
in
#"Removed Columns1"
Edit - Another approach which is shorter is possible through the use of Inner join
let
Source = Table.FromRows(Json.Document(Binary.Decompress(Binary.FromText("i45WMjYxVdJRcgRit0hDywADCyArODMvPbEgvyhVKVaHVBVGxgEGRlRQYUh7FUYBhgTcQVAFMDwM8duCXYWhkbEJUNAJZg0Wp5KqBBQ1mI6lgRJQyJqMKomNBQA=", BinaryEncoding.Base64), Compression.Deflate)), let _t = ((type nullable text) meta [Serialized.Text = true]) in type table [#"Project Number" = _t, Project = _t, #"Shippable Period" = _t, #"Mgmt Region" = _t]),
#"Changed Type" = Table.TransformColumnTypes(Source,{{"Project Number", Int64.Type}, {"Project", type text}, {"Shippable Period", type text}, {"Mgmt Region", type text}}),
#"Added Custom" = Table.AddColumn(#"Changed Type", "Custom", each Number.From(Text.Select([Shippable Period],{"0".."9"}))),
#"Grouped Rows" = Table.Group(#"Added Custom", {"Project Number"}, {{"Temp", each List.Max([Custom]), type number}}),
#"Merged Queries" = Table.NestedJoin(#"Added Custom", {"Project Number", "Custom"}, #"Grouped Rows", {"Project Number", "Temp"}, "Grouped Rows", JoinKind.Inner),
#"Removed Columns" = Table.RemoveColumns(#"Merged Queries",{"Grouped Rows"}),
#"Removed Duplicates" = Table.Distinct(#"Removed Columns", {"Project", "Custom"}),
#"Removed Columns1" = Table.RemoveColumns(#"Removed Duplicates",{"Custom"})
in
#"Removed Columns1"