Search code examples
excelpowerbipowerquerypowerbi-desktopm

Extracting Text Between boundaries by applying logical parameters


First I think this is a complicated question to follow. Please see the Steps of my M code which I think will make it clearer.

So I am trying to achieve the following:

enter image description here

The idea is any text in the input box can be limited to relevant sections using the parameters table If the Text in the parameters box is Contained in the Text being searched. For me, at least the question is more complicated than it first appears. If required /interested Please see my explanation below:

Desired Output Fundamentally I want a way of filtering the Input text, using the parameters box such that each line returned is contained only within the relevant sections of start1-End1, Start2-End2.

Ideally, you can use any part of the text to set the limits. So I could say Everything between SECTION1-2, and between lines 2 and 5. will returns lines 2-5.

Or you could say Everything between SECTION 1-3, lines 4-8. Will return lines 4-8. Note you could even say SECTION1-3, Lines 4 -SECTION 4 which would return lines 4 up to 14.

Finally you could even overlap sections and These should still be captured separately and the lines where overlap occurs should repeat in the output.

M Code:

Parameters:

let
    Source = Excel.CurrentWorkbook(){[Name="Parameters"]}[Content],
    #"Changed Type" = Table.TransformColumnTypes(Source,{{"Start1", type text}, {"End1", type text}, {"Start2", type text}, {"End2", type text}}),
    #"Filled Down" = Table.FillDown(#"Changed Type",{"Start1", "End1"}),
    #"Duplicated Column1" = Table.DuplicateColumn(#"Filled Down", "Start1", "Start1 - Copy"),
    #"Duplicated Column2" = Table.DuplicateColumn(#"Duplicated Column1", "End1", "End1 - Copy"),
    #"Duplicated Column3" = Table.DuplicateColumn(#"Duplicated Column2", "Start2", "Start2 - Copy"),
    #"Duplicated Column4" = Table.DuplicateColumn(#"Duplicated Column3", "End2", "End2 - Copy"),
    #"Added Custom" = Table.AddColumn(#"Duplicated Column4", "Custom", each "X"),
    #"Merged Columns" = Table.CombineColumns(#"Added Custom",{"Start1", "Custom"},Combiner.CombineTextByDelimiter("+++", QuoteStyle.None),"Start1"),
    #"Duplicated Column" = Table.DuplicateColumn(#"Merged Columns", "End1", "End1 - Copy.1"),
    #"Merged Columns3" = Table.CombineColumns(#"Duplicated Column",{"Start1", "End1"},Combiner.CombineTextByDelimiter(",", QuoteStyle.None),"Search1"),
    #"Added Custom1" = Table.AddColumn(#"Merged Columns3", "Custom", each "X"),
    #"Added Custom3" = Table.AddColumn(#"Added Custom1", "Custom.1", each "X"),
    #"Merged Columns1" = Table.CombineColumns(#"Added Custom3",{"Start2", "Custom"},Combiner.CombineTextByDelimiter("+++", QuoteStyle.None),"Start2"),
    #"Merged Columns4" = Table.CombineColumns(#"Merged Columns1",{"End2", "Custom.1"},Combiner.CombineTextByDelimiter("+++", QuoteStyle.None),"End2"),
    #"Merged Columns2" = Table.CombineColumns(#"Merged Columns4",{"Start2", "End2", "End1 - Copy.1"},Combiner.CombineTextByDelimiter(",", QuoteStyle.None),"Search2"),
    #"Added Custom2" = Table.AddColumn(#"Merged Columns2", "Custom", each Table.FromColumns({Text.Split([Search1], ","), Text.Split([Search2], ",")})),
    #"Removed Other Columns" = Table.SelectColumns(#"Added Custom2",{"Start1 - Copy", "End1 - Copy","Start2 - Copy","End2 - Copy","Custom"}),
    #"Expanded Custom" = Table.ExpandTableColumn(#"Removed Other Columns", "Custom", {"Column1", "Column2"}, {"Search1", "Search2"}),
    #"Split Column by Delimiter" = Table.SplitColumn(#"Expanded Custom", "Search1", Splitter.SplitTextByEachDelimiter({"+++"}, QuoteStyle.None, true), {"Search1", "Filter1"}),
    #"Split Column by Delimiter1" = Table.SplitColumn(#"Split Column by Delimiter", "Search2", Splitter.SplitTextByEachDelimiter({"+++"}, QuoteStyle.None, true), {"Search2", "Filter2"}),
    #"Changed Type1" = Table.TransformColumnTypes(#"Split Column by Delimiter1",{{"Search1", type text}, {"Filter1", type text}, {"Search2", type text}, {"Filter2", type text}}),
    #"Filled Down1" = Table.FillDown(#"Changed Type1",{"Search1"}),
    #"Sorted Rows" = Table.Sort(#"Filled Down1",{{"Filter1", Order.Ascending}}),
    #"Replaced Value" = Table.ReplaceValue(#"Sorted Rows",null,"",Replacer.ReplaceValue,{"Filter1", "Search2", "Filter2"})
in
    #"Replaced Value"

Text:

let
    Source = Excel.CurrentWorkbook(){[Name="TextToSearch"]}[Content],
    #"Changed Type" = Table.TransformColumnTypes(Source,{{"Text", type text}}),
    Search1 = Table.AddColumn(#"Changed Type", "Search1",  (x) => Table.SelectRows(Parameters, each Text.Contains(x[Text],[Search1], Comparer.OrdinalIgnoreCase))),
    #"Expanded Search1" = Table.ExpandTableColumn(Search1, "Search1", {"Search1", "Filter1"}, {"Search1", "Filter1"}),
    #"Filled Down" = Table.FillDown(#"Expanded Search1",{"Search1", "Filter1"}),
    #"Filtered Rows" = Table.SelectRows(#"Filled Down", each ([Filter1] = "X")),
    #"Removed Other Columns" = Table.SelectColumns(#"Filtered Rows",{"Text", "Search1"}),
    Search2 = Table.AddColumn(#"Removed Other Columns", "Search2",  (x) => Table.SelectRows(Parameters, each Text.Contains(x[Search1],[Search1], Comparer.OrdinalIgnoreCase) and Text.Contains(x[Text],[Search2], Comparer.OrdinalIgnoreCase))),
    #"Removed Other Columns1" = Table.SelectColumns(Search2,{"Text", "Search2"}),
    #"Expanded Search2" = Table.ExpandTableColumn(#"Removed Other Columns1", "Search2", {"Start1 - Copy", "End1 - Copy", "Start2 - Copy", "End2 - Copy", "Search2", "Filter2"}, {"Start1 - Copy", "End1 - Copy", "Start2 - Copy", "End2 - Copy", "Search2.1", "Filter2"}),
    #"Filled Down1" = Table.FillDown(#"Expanded Search2",{"Search2.1", "Filter2"}),
    #"Filtered Rows1" = Table.SelectRows(#"Filled Down1", each ([Filter2] = "X")),
    #"Removed Other Columns2" = Table.SelectColumns(#"Filtered Rows1",{"Start1 - Copy", "End1 - Copy", "Start2 - Copy", "End2 - Copy", "Text"}),
    #"Filled Down2" = Table.FillDown(#"Removed Other Columns2",{"Start1 - Copy", "End1 - Copy", "Start2 - Copy", "End2 - Copy", "Text"}),
    #"Renamed Columns" = Table.RenameColumns(#"Filled Down2",{{"Start1 - Copy", "Start1"}, {"End1 - Copy", "End1"}, {"Start2 - Copy", "Start2"}, {"End2 - Copy", "End2"}})
in
    #"Renamed Columns"

Real Example:

https://1drv.ms/x/s!AsrLaUgt0KCLvUgQQctfMtFe057l?e=AkbeP3


Solution

  • Here it is with partial matches.

    enter image description here

    enter image description here

    let
        Source = Table.FromRows(Json.Document(Binary.Decompress(Binary.FromText("i45W8kjNUdJRCnZ1DvH091MwArJzMvNSFQxhDBOlWJ1oJSAPJgBXYgqWQdYJYxvDTTGAswxRVJsiqTaFmwxXbWSkFBsLAA==", BinaryEncoding.Base64), Compression.Deflate)), let _t = ((type nullable text) meta [Serialized.Text = true]) in type table [Start1 = _t, End1 = _t, Start2 = _t, End2 = _t]),
        #"Changed Type" = Table.TransformColumnTypes(Source,{{"Start1", type text}, {"End1", type text}, {"Start2", type text}, {"End2", type text}}),
        #"Replaced Value" = Table.ReplaceValue(#"Changed Type","",null,Replacer.ReplaceValue,{"Start1", "End1"}),
        #"Filled Down" = Table.FillDown(#"Replaced Value",{"Start1", "End1"}),
        #"Added Custom" = Table.AddColumn(#"Filled Down", "Custom", each let 
    input = Input[Text],
    s1 = List.PositionOf(input, List.FindText(input,[Start1]){0}),   
    e1 = List.PositionOf(input, List.FindText(input,[End1]){0}),
    r1 = if s1=e1 then  List.Range(input,s1) else List.Range(input,s1,e1-s1+1),
    s2 = List.PositionOf(r1, List.FindText(input,[Start2]){0}),
    e2 = List.PositionOf(r1, List.FindText(input,[End2]){0}),
    r2 = List.Range(r1,s2,e2-s2+1)
    
    in r2),
        #"Expanded Custom" = Table.ExpandListColumn(#"Added Custom", "Custom")
    in
        #"Expanded Custom"