Search code examples
iosstringswift3nsarraynsdictionary

How to separate HTML string into array or dictionary by Swift3?


I got HTML string from API like this:

let a: String = "<a href="https://www.google.com.tw">https://www.google.com.tw </a>"
let b: String = "<a href="myAppName://app/user/aa3b77411825b88b318d77gg">@Tim </a>Hello Tim"
let c: String = "<a href="myAppName://app/user/aa3b77411825b88b318d77gg">@Tim </a><a href="https://www.google.com.tw">https://www.google.com.tw </a>"

let splitedArray1: [String] = a.componentsSeparatedByString("?????") //splited string which is the best 
let splitedArray2: [String] = b.componentsSeparatedByString("?????") //splited string which is the best
let splitedArray3: [String] = c.componentsSeparatedByString("?????") //splited string which is the best

I want to separate link from them and get the data like following

print(splitedArray1) //["https://www.google.com.tw","https://www.google.com.tw"]
print(splitedArray2) //["myAppName://app/user/aa3b77411825b88b318d77gg","@Tim ","Hello Tim"]
print(splitedArray3) //["myAppName://app/user/aa3b77411825b88b318d77gg","@Tim ","https://www.google.com.tw","https://www.google.com.tw "]

Solution

  • Possible solution: Use NSAttributedString then enumerate on the NSLinkAttributeName, if there isn't, it means there were no link tag, so you just keep the "string", else, you add the link, then the string.

    Quickly written in Playground:

    let a: String = "<a href=\"https://www.google.com.tw\">https://www.google.com.tw </a>"
    let b: String = "<a href=\"myAppName://app/user/aa3b77411825b88b318d77gg\">@Tim </a>Hello Tim"
    let c: String = "<a href=\"myAppName://app/user/aa3b77411825b88b318d77gg\">@Tim </a><a href=\"https://www.google.com.tw\">https://www.google.com.tw </a>"
    
    let values:[String] = [a, b, c]
    
    
    
    for aHTMLString in values
    {
        let attributedString = try! NSAttributedString.init(data: aHTMLString.data(using: .utf8)!,
                                                            options: [.documentType: NSAttributedString.DocumentType.html],
                                                            documentAttributes: nil)
        var retValues = [String]()
        attributedString.enumerateAttribute(.link,
                                            in: NSRange(location: 0, length: attributedString.string.count),
                                            options: [],
                                            using: { (attribute, range, pointerStop) in
                                                if let attribute = attribute as? URL
                                                {
                                                    retValues.append(attribute.absoluteString)
                                                }
                                                let subString = (attributedString.string as NSString).substring(with: range)
                                                retValues.append(subString)
        })
    
        print("*** retValues: \(retValues)")
    }
    
    let targetResult1 = ["https://www.google.com.tw","https://www.google.com.tw"]
    let targetResult2 = ["myAppName://app/user/aa3b77411825b88b318d77gg","@Tim ","Hello Tim"]
    let targetResult3 = ["myAppName://app/user/aa3b77411825b88b318d77gg","@Tim ","https://www.google.com.tw","https://www.google.com.tw "]
    print("targetResult1: \(targetResult1)")
    print("targetResult2: \(targetResult2)")
    print("targetResult3: \(targetResult3)")
    

    Output:

    *** retValues: ["https://www.google.com.tw/", "https://www.google.com.tw "]
    *** retValues: ["myappname://app/user/aa3b77411825b88b318d77gg", "@Tim ", "Hello Tim"]
    *** retValues: ["myappname://app/user/aa3b77411825b88b318d77gg", "@Tim ", "https://www.google.com.tw/", "https://www.google.com.tw "]
    targetResult1: ["https://www.google.com.tw", "https://www.google.com.tw"]
    targetResult2: ["myAppName://app/user/aa3b77411825b88b318d77gg", "@Tim ", "Hello Tim"]
    targetResult3: ["myAppName://app/user/aa3b77411825b88b318d77gg", "@Tim ", "https://www.google.com.tw", "https://www.google.com.tw "]
    

    There are small differences, I copied your "target" (splitArray), and it's missing a space in the last one, and my code tend to add a final "/" on links.