Search code examples
javaregexjsoup

Java - Obtain text within script tag


How to get value https://example1.com/2147414277.mp4?md5=OFvyZ55egHb4A5hUZJvSEQ&time=1580513886&ip=176.9.117.35 in this script? the first Link value https://example.com/2147414277.mp4?md5=OFvyZ55egHb4A5hUZJvSEQ&time=1580513886 just does not work.

    String html    ="<script>function getcookie(Name){\n" +
                    "  var search=Name+\"=\";\n" +
                    "  if(document.cookie.length>0){\n" +
                    "    offset=document.cookie.indexOf(search)\n" +
                    "    if(offset!=-1){\n" +
                    "      offset+=search.length\n" +
                    "      end=document.cookie.indexOf(\";\",offset)\n" +
                    "      if(end==-1){end=document.cookie.length}\n" +
                    "      return unescape(document.cookie.substring(offset, en))\n" +
                    "    }\n" +
                    "  }\n" +
                    "}\n" +
                    "   var player = new Playerjs({\n" +
                    "  \"id\":\"player\",\n" +
                    "  \"poster\":\"https://media.example.com/img/2147414277.jpg\",\n" +
                    "  \"file\":\"[SD (480p)]https://example.com/2147414277.mp4?md5=OFvyZ55egHb4A5hUZJvSEQ&time=1580513886 or https://example1.com/2147414277.mp4?md5=OFvyZ55egHb4A5hUZJvSEQ&time=1580513886&ip=176.9.117.35,[HD (720р)]https://example2.com/720/2147414277.mp4?md5=GXD4cKVnM5RVY363Uxn9ww&time=1580513886 or https://exampl3.com/720/2147414277.mp4?md5=GXD4cKVnM5RVY363Uxn9ww&time=1580513886&ip=190.9.117.35\",\n" +
                    "  \"default_quality\":\"SD (480p)\"\n" +
                    "  });\n" +
                    "  function PlayerjsEvents(event,id,data){\n" +
                    "   if(event==\"start\"){\n" +
                    "      var xhttp = new XMLHttpRequest();\n" +
                    "  xhttp.open(\"GET\", \"stat.php?id=2147414277\", true);\n" +
                    "  xhttp.send();\n" +
                    "   }\n" +
                    "   if(event==\"end\"){\n" +
                    "   \n" +
                    "   }\n" +
                    "   }</script>";


    Pattern p = Pattern.compile("file\"",Pattern.DOTALL);
    String url = "";

    for (Element element : script) {
        Matcher m = p.matcher(element.data());
            if (m.find()){
                url = m.group(1);
            }
    }
    System.out.println(url);

I don’t really understand how the Java Regex Pattern works. I tried to find this URL many times but I'm failed. I would be grateful if someone will help with that or at least give me a link guide of java regex for a newbie. thx


Solution

  • It seems to me you are trying to extract URLs from JS code using java. There are many regex playgrounds on the web you can try such as:

    For your specific case :

    import java.util.*;
    import java.util.regex.*;  
    import java.net.*;
    public class HelloWorld{
    
         public static void main(String []args){
            System.out.println("Hello World");
            String jsCode    ="<script>function getcookie(Name){\n" +
                        "  var search=Name+\"=\";\n" +
                        "  if(document.cookie.length>0){\n" +
                        "    offset=document.cookie.indexOf(search)\n" +
                        "    if(offset!=-1){\n" +
                        "      offset+=search.length\n" +
                        "      end=document.cookie.indexOf(\";\",offset)\n" +
                        "      if(end==-1){end=document.cookie.length}\n" +
                        "      return unescape(document.cookie.substring(offset, en))\n" +
                        "    }\n" +
                        "  }\n" +
                        "}\n" +
                        "   var player = new Playerjs({\n" +
                        "  \"id\":\"player\",\n" +
                        "  \"poster\":\"https://media.example.com/img/2147414277.jpg\",\n" +
                        "  \"file\":\"[SD (480p)]https://example.com/2147414277.mp4?md5=OFvyZ55egHb4A5hUZJvSEQ&time=1580513886 or https://example1.com/2147414277.mp4?md5=OFvyZ55egHb4A5hUZJvSEQ&time=1580513886&ip=176.9.117.35,[HD (720р)]https://example2.com/720/2147414277.mp4?md5=GXD4cKVnM5RVY363Uxn9ww&time=1580513886 or https://exampl3.com/720/2147414277.mp4?md5=GXD4cKVnM5RVY363Uxn9ww&time=1580513886&ip=190.9.117.35\",\n" +
                        "  \"default_quality\":\"SD (480p)\"\n" +
                        "  });\n" +
                        "  function PlayerjsEvents(event,id,data){\n" +
                        "   if(event==\"start\"){\n" +
                        "      var xhttp = new XMLHttpRequest();\n" +
                        "  xhttp.open(\"GET\", \"stat.php?id=2147414277\", true);\n" +
                        "  xhttp.send();\n" +
                        "   }\n" +
                        "   if(event==\"end\"){\n" +
                        "   \n" +
                        "   }\n" +
                        "   }</script>";
    
    
    
    
            List<String> urls = new ArrayList<String>();
           String myUrlPattern = "((https?|file):((//)|(\\\\))+[\\w\\d:#@%/;$()~_?\\+-=\\\\\\.&]*)";
            Pattern p = Pattern.compile(myUrlPattern);
            Matcher m = p.matcher(jsCode);
            while (m.find()) {
                urls.add(m.group());
            }
            for(String s: urls)
             System.out.println(s);
    
         }
    }
    

    Gives out:

    Hope this helps!