Search code examples
javahtmlparsingjsoup

Append the XML/HTML tag only if it's outside of a particular tag. JAVA/JSOUP


There are two cases:

  1. If <if> tag is present outside of <except> tag then append <print> tag and also append </print> tag with respective </if> tag.

  2. If <print> tag is already associated with <if> tag then no need to add it again.

The input XML is:

<if>
  <except>
    <if>
      <except>
        <if />
      </except>
    </if>
  </except>
</if>

And the expected output should be:

<if>
  <print>
    <except>
      <if>
        <except>
          <if />
        </except>
      </if>
    </except>
  </print>
</if>

What can I do to achieve this?


Solution

  • Explanation in comments:

    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    import org.jsoup.nodes.Element;
    import org.jsoup.parser.Parser;
    import org.jsoup.select.Elements;
    
    public class StackOverflow58484337 {
    
        public static void main(String[] args) {
            String html = "<if><except><if><except><if /></except></if></except></if>";
            Document doc = Jsoup.parse(html, "", Parser.xmlParser());
            // select every "if" element
            Elements ifs = doc.select("if");
            System.out.println("--- before:");
            System.out.println(doc);
            // check every "if" element if any of its parents is "except" element
            for (Element singleIf : ifs) {
                if (isOutsideExcept(singleIf)) {
                    // wrap it in "print" element
                    singleIf.children().wrap("<print>");
                }
            }
            System.out.println("--- after:");
            System.out.println(doc);
        }
    
        private static boolean isOutsideExcept(Element singleIf) {
            Element parent = singleIf.parent();
            // check parent, and parent of his parent, and parent of his parent ...
            while (parent != null) {
                if (parent.tagName().equals("except")) {
                    return false;
                }
                parent = parent.parent();
            }
            return true;
        }
    
    }
    

    output:

    --- before:
    <if>
     <except>
      <if>
       <except>
        <if />
       </except>
      </if>
     </except>
    </if>
    --- after:
    <if>
     <print>
      <except>
       <if>
        <except>
         <if />
        </except>
       </if>
      </except>
     </print>
    </if>