Search code examples
swiftcoremlcreateml

How to Properly load JSON data from API endpoint as DataFrame


I am working on a ML model to predict Premier League teams upcoming matches, and I'm doing it programmatically rather than through the createML app. I am having trouble loading the data from API endpoints as just JSON data to convert into a DataFrame object to train the model with and then predict their games.

I am currently receiving an error: Unsupported JSON structure. Top-Level object must be sequence but if I query that data in command line and train it in the CreateML app it works.

If someone could provide some feedback on how to properly load the data from an API endpoint into a DataFrame it would be much appreciated

Code:

struct MatchPredictionsView: View {
    @State var processing: Bool = false
    @State var predictions: AnyColumn?
    @State var errorMessage: String?
    var team: Team?
    
    var body: some View {
        VStack {
            if processing {
                ProgressView()
            } else if let predictions = predictions {
                // Display predictions
                Text("Predictions: \(predictions.description)")
            } else if let errorMessage = errorMessage {
                Text("Error: \(errorMessage)")
                    .foregroundColor(.red)
            } else {
                Text("No predictions available.")
            }
        }
        .task {
            await fetchPredictions()
        }
    }
    
    func fetchPredictions() async {
        do {
            processing = true
            errorMessage = nil
            
            guard let teamIDString = team?.id.description else {
                errorMessage = "Invalid team ID."
                return
            }
            
            predictions = try await predictMatchesEndToEnd(teamID: teamIDString)
        } catch {
            errorMessage = "Error during match prediction: \(error)"
        }
        processing = false
    }
    
    func predictMatchesEndToEnd(teamID: String) async throws -> AnyColumn {
        // API Key and Header
        let apiKey = "f169854fa08340abbc130202705c60dc"
        let headers = ["X-Auth-Token": apiKey]
        
        // Create JSON URLs
        guard let trainingDataURL = URL(
                string: "https://api.football-data.org/v4/teams/\(teamID)/matches?dateFrom=2023-08-15&dateTo=2024-05-25"),
              let gamesToPredictURL = URL(
                string: "https://api.football-data.org/v4/teams/\(teamID)/matches?dateFrom=2024-08-15&dateTo=2025-05-25") else {
            throw URLError(.badURL)
        }
        
        // Fetch JSON data
        let gameTrainingDataJSON = try await fetchJSONData(from: trainingDataURL, headers: headers)
        let gamesToPredictJSON = try await fetchJSONData(from: gamesToPredictURL, headers: headers)
        
        let gameTrainingData = try DataFrame(jsonData: gameTrainingDataJSON)
        let gamesToPredictDataFrame = try DataFrame(jsonData: gamesToPredictJSON)
        
        // Model Setup
        let featureColumns = [
            "homeTeam/name",
            "awayTeam/name",
            "score/fullTime/home",
            "score/fullTime/away",
            "score/halfTime/home",
            "score/halfTime/away",
            "referees/0/name",
            "season/winner/name"
        ]
        
        let parameters = MLRandomForestClassifier.ModelParameters(
            validation: .split(strategy: .automatic),
            maxIterations: 100,
            randomSeed: 38
        )
        
        let model = try MLRandomForestClassifier(
            trainingData: gameTrainingData,
            targetColumn: "score/winner",
            featureColumns: featureColumns,
            parameters: parameters
        )
        
        // Predict
        let predictions = try model.predictions(from: gamesToPredictDataFrame)
        
        return predictions
    }

    func fetchJSONData(from url: URL, headers: [String: String]) async throws -> Data {
        var request = URLRequest(url: url)
        for (key, value) in headers {
            request.setValue(value, forHTTPHeaderField: key)
        }
        
        let (data, response) = try await URLSession.shared.data(for: request)
        
        guard let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 200 else {
            throw URLError(.badServerResponse)
        }
        
        return data
    }
}

Solution

  • As the error states DataFrame(jsonData:) expects an array as top level object, the received JSON is a dictionary though. You must extract the matches array.

    This can be accomplished by decoding the JSON with JSONSerialization, get the array for key matches and re-encode the JSON.

    Replace fetchJSONData(from:headers:) with

    func fetchJSONData(from url: URL, headers: [String: String]) async throws -> Data {
        var request = URLRequest(url: url)
        for (key, value) in headers {
            request.setValue(value, forHTTPHeaderField: key)
        }
        
        let (data, response) = try await URLSession.shared.data(for: request)
        
        guard let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 200 else {
            throw URLError(.badServerResponse)
        }
        guard let json = try JSONSerialization.jsonObject(with: data) as? [String:Any],
              let matches = json["matches"] as? [[String:Any]] else { throw URLError(.cannotDecodeContentData)}
        return try JSONSerialization.data(withJSONObject: matches)
    }