Search code examples
javacss-parsing

Get list of CSS rules that apply to a specific HTML class in Java


I'm using the CSS Parser to get specific CSS rules that belong to a set HTML class. At the moment I have got a list of CSS rules in the site, however I cannot figure out how to get the rules I'm looking for.

Current Code:

InputSource inputSource = new InputSource("example.com");
CSSOMParser parser = new CSSOMParser(new SACParserCSS3());

ErrorHandler errorHandler = new CSSErrorHandler();
parser.setErrorHandler(errorHandler);

CSSStyleSheet sheet = parser.parseStyleSheet(inputSource, null, null);

CSSRuleList rules = sheet.getCssRules();

One of my options would be doing a for loop, but I'm reluctant to do this because
a. It will be slow if there is hundreds of rules in a page.
b. There appears to be no method to get the class name of a rule.

Any help would be appreciated


Solution

  • Add a: The CSS has already been parsed by your code, so you only have to look at the selectors which might be acceptable in terms of performance.

    Add b: The CSSStyleRule interface lacks a method getSelectors() but CSSStyleRuleImpl has it. So you could try something along:

    scanRules(rules, name -> name.contains("e"), 
        (names, rule) -> System.out.println(
                new TreeSet<>(names) + " --> " + rule.getCssText()));
    

    with recursive helper methods

    // scan CSS rules including rules contained in media rules 
    void scanRules(CSSRuleList rules, Predicate<String> classNameTest,
            BiConsumer<Set<String>, CSSStyleRule> ruleAction) {
        for (int ri = 0; ri < rules.getLength(); ri++) {
            CSSRule rule = rules.item(ri);
            if (rule.getType() == CSSRule.MEDIA_RULE) {
                CSSMediaRule mr = (CSSMediaRule) rule;
                scanRules(mr.getCssRules(), classNameTest, ruleAction);
            } else if (rule.getType() == CSSRule.STYLE_RULE) {
                CSSStyleRuleImpl styleRule = (CSSStyleRuleImpl) rule;
                SelectorList selectors = styleRule.getSelectors();
                // if (!styleRule.getSelectorText().contains(".name"))
                //  continue; // selector text test might cause speed up...
                for (int si = 0; si < selectors.getLength(); si++) {
                    Selector selector = selectors.item(si);
                    Set<String> classNames = classNamesInSelectorMatching(selector, classNameTest);
                    if (!classNames.isEmpty()) 
                        ruleAction.accept(classNames, styleRule);
                }
            }
        }
    }
    
    // find matching class names in given (potentially complex) selector
    Set<String> classNamesInSelectorMatching(Selector selector,
            Predicate<String> nameMatches) {
        switch (selector.getSelectorType()) {
            case Selector.SAC_CHILD_SELECTOR:
            case Selector.SAC_DESCENDANT_SELECTOR:
            case Selector.SAC_DIRECT_ADJACENT_SELECTOR: {
                DescendantSelector ds = (DescendantSelector) selector;
                Set<String> set = new HashSet<>();
                set.addAll(classNamesInSelectorMatching(ds.getAncestorSelector(), nameMatches));
                set.addAll(classNamesInSelectorMatching(ds.getSimpleSelector(), nameMatches));
                return set;
            }
            case Selector.SAC_NEGATIVE_SELECTOR: {
                NegativeSelector ns = (NegativeSelector) selector;
                return classNamesInSelectorMatching(ns.getSimpleSelector(), nameMatches);
            }
            case Selector.SAC_CONDITIONAL_SELECTOR: {
                ConditionalSelector ns = (ConditionalSelector) selector;
                return classNamesInConditionMatching(ns.getCondition(), nameMatches);
            }
            default:
                return Collections.emptySet();
        }
    }
    
    // find matching class names in given (potentially complex) condition
    Set<String> classNamesInConditionMatching(Condition condition,
            Predicate<String> nameMatches) {
        switch (condition.getConditionType()) {
            case Condition.SAC_CLASS_CONDITION: {
                AttributeCondition ac = (AttributeCondition) condition;
                if (nameMatches.test(ac.getValue()))
                    return Collections.singleton(ac.getValue());
                else
                    return Collections.emptySet();
            }
            case Condition.SAC_AND_CONDITION:
            case Condition.SAC_OR_CONDITION: {
                CombinatorCondition cc = (CombinatorCondition) condition;
                Set<String> set = new HashSet<>();
                set.addAll(classNamesInConditionMatching(cc.getFirstCondition(), nameMatches));
                set.addAll(classNamesInConditionMatching(cc.getSecondCondition(), nameMatches));
                return set;
            }
            case Condition.SAC_NEGATIVE_CONDITION: {
                NegativeCondition nc = (NegativeCondition) condition;
                return classNamesInConditionMatching(nc.getCondition(), nameMatches);
            }
            default:
                return Collections.emptySet();
        }
    }
    

    I have tried it with input https://www.w3.org/2008/site/css/minimum-src.css and it seems to work for me.