Search code examples
pythonpython-3.xliststructure

How to Validate if a List Matches a Given Structural Pattern?


I am trying to write a Python function to validate whether a given list follows the same structure as a predefined "model" list, which in my particular case is:

model = [
    ["h", "P12", "P13"],
    ["P12", "P23", "eL"],
    ["P13", "P23", "eR"]
]

In words:

  • The fixed elements "h", "eL", and "eR" must appear in different sublists.
  • The remaining elements (placeholders) - "P12", "P13", and "P23" - are arbitrary elements depending on the given list to test. The order of the elements is irrelevant
  • Repetition of the elements "h", "eL", and "eR" in a sublist are allowed if it fits the model structure.
  • The string "P12" must be a common element between the list that contains the element "h" and the list that contains the string "eL"
  • The string "P13" must be a common element between the list that contains the element "h" and the list that contains the string "eR"
  • The string "P23" must be a common element between the list that contains the element "eL" and the list that contains the string "eR"

So, for example:

test_list_1 = [
    ["h", "a2", "a3"],
    ["a2", "a4", "eL"],
    ["a3", "a4", "eR"]
]

> True


test_list_2 = [
["h", "h", "Y"],
["h", "Z", "eL"],
["Y", "Z", "eR"]
]
    
> True


test_list_3 = [
["h", "X", "eL"],
["X", "Z", "eL"],
["eL", "Z", "eR"]
]
    
> True


test_list_4 = [
["h", "P1", "eL"],
["P1", "P3", "P2"], # ❌ there is no list containing "eL"
["P2", "P3", "eR"]
]
    
> False

I have tried some help with ChatGPT, but the following code is not working properly, and so far, I cannot make it work. This is my code so far with some help of AI:

from collections import defaultdict

def validate_structure(model, test_list):
    fixed_elements = {"h", "eL", "eR"}
    model_fixed_positions = {}
    placeholder_mapping = {}
    
    # Step 1: Identify where fixed elements appear in the model
    for i, sublist in enumerate(model):
        for item in sublist:
            if item in fixed_elements:
                model_fixed_positions[item] = i
    
    # Step 2: Identify where fixed elements appear in the test list
    test_fixed_positions = {}
    for i, sublist in enumerate(test_list):
        for item in sublist:
            if item in fixed_elements:
                if item in test_fixed_positions:
                    return False  # Each fixed element must be in a distinct sublist
                test_fixed_positions[item] = i
    
    # Step 3: Ensure fixed elements are in corresponding positions
    if set(model_fixed_positions.keys()) != set(test_fixed_positions.keys()):
        return False  # Missing or extra fixed elements
    
    for key in model_fixed_positions:
        if model_fixed_positions[key] != test_fixed_positions[key]:
            return False  # Fixed elements must appear in the same indexed sublists
    
    # Step 4: Establish and validate placeholder mapping
    for i, (model_sublist, test_sublist) in enumerate(zip(model, test_list)):
        model_placeholders = [x for x in model_sublist if x not in fixed_elements]
        test_placeholders = [x for x in test_sublist if x not in fixed_elements]
        
        if len(model_placeholders) != len(test_placeholders):
            return False  # Different number of elements
        
        for m_item, t_item in zip(model_placeholders, test_placeholders):
            if m_item in placeholder_mapping:
                if placeholder_mapping[m_item] != t_item:
                    return False  # Inconsistent placeholder mapping
            else:
                placeholder_mapping[m_item] = t_item
    
    return True  # Structure matches

# Example usage:
print(validate_structure(model, test_list_2))  # Expected: True, but returns False

Solution

  • I used sets to check for membership (and only allowed the members P12, P13 and P23 to be considered to match.

    test_list_1 = [
        ["h", "a2", "a3"],
        ["a2", "a4", "eL"],
        ["a3", "a4", "eR"]
    ]
    
    test_list_2 = [
    ["h", "h", "Y"],
    ["h", "Z", "eL"],
    ["Y", "Z", "eR"]
    ]
    
    test_list_3 = [
    ["h", "X", "eL"],
    ["X", "Z", "eL"],
    ["eL", "Z", "eR"]
    ]
        
    test_list_4 = [
    ["h", "P1", "eL"],
    ["P1", "P3", "P2"], # ❌ there is no list containing "eL"
    ["P2", "P3", "eR"]
    ]
    
    test_list_5 = [["h", "h", "Y"], ["h", "Z", "eL"], ["k", "Z", "eR"]]
        
    
    model = [
        ["h", "P12", "P13"],
        ["P12", "P23", "eL"],
        ["P13", "P23", "eR"]
    ]
    
    def validate(test_list, model):
        fixed_vals = ["h", "eL", "eR"]
    
        for value, model_array, test_array in zip(fixed_vals, model, test_list):
            idx = model_array.index(value)
            if value != test_array[idx]:
                return False
            
        sets = []
        for i in range(len(test_list)):
            s = set(test_list[i])
            if 1 == test_list[i].count(fixed_vals[i]):
                s.remove(fixed_vals[i])
            sets.append(s)
    
        #sets = list(map(set, test_list))
            # only if fixed elements in postion sre allowed to match
            # I don't think thats allowed by his specifications
            # in test_list_3 the 'Z's match, not the 'eL's
    
        for i in range(len(sets)):
            for j in range(i+1, len(sets)):
                if len(sets[i] & sets[j]) == 0:
                    return False
        return True
    
    print(validate(test_list_1, model))
    print(validate(test_list_2, model))
    print(validate(test_list_3, model))
    print(validate(test_list_4, model))
    print(validate(test_list_5, model))
    

    Prints:

    True
    True
    True
    False
    False