Search code examples
mysqlimportgeoipgeolite2

Importing MaxMind's GeoLite2 to MySQL


MaxMind's GeoLite2 is a wonderful database and is very useful if you want to map IP addresses to countries.

To do this efficiently, I want to import it into a MySQL database with a scheme like this:

Model

I remember writing an import script for the CSV database long time ago, but the CSV as you can download it today has a very difficult to understand format, at least to me:

network,geoname_id,registered_country_geoname_id,represented_country_geoname_id,is_anonymous_proxy,is_satellite_provider 1.0.0.0/24,2077456,2077456,,0,0 1.0.1.0/24,1814991,1814991,,0,0 1.0.2.0/23,1814991,1814991,,0,0 1.0.4.0/22,2077456,2077456,,0,0 1.0.8.0/21,1814991,1814991,,0,0 1.0.16.0/20,1861060,1861060,,0,0 1.0.32.0/19,1814991,1814991,,0,0 1.0.64.0/18,1861060,1861060,,0,0 1.0.128.0/17,1605651,1605651,,0,0

I'm really stuck at the basics here. What is the most efficient and easiest way to import the database from its CSV representation into MySQL?


Solution

  • It really doesn't seem possible to do this with a simple SQL script, so I've written one in C#. And since importing MySQL databases that are so big is not that simple, I implemented a direct INSERT INTO into the script itself.

    A table structure like the one on the sketch in the question is required for it to work.

    using MySql.Data.MySqlClient;
    using System;
    using System.Collections.Generic;
    using System.IO;
    using System.Linq;
    
    namespace GeoIPConvert
    {
        public static class Program
        {
            public static void Main(string[] args)
            {
                // https://dev.maxmind.com/geoip/geoip2/geolite2/
    
                List<Country> countries = File.ReadAllLines("Countries.csv")
                    .Select(line => line.Split(','))
                    .Where(line => line[4] != "" && line[5] != "")
                    .Select((line, index) => new Country
                    {
                        ID = Convert.ToInt32(line[0]),
                        DatabaseID = index + 1,
                        Flag = line[4].ToLower(),
                        Name = line[5].Replace("\"", "")
                    })
                    .ToList();
    
                List<IPRange> ipRanges = File.ReadAllLines("GeoIP.csv")
                    .Select(line => line.Split(','))
                    .Where(line => line[2] != "")
                    .Select(line => new IPRange
                    {
                        Country = countries.First(country => country.ID == Convert.ToInt32(line[2])),
                        From = ConvertCidrToRange(line[0]).Item1,
                        To = ConvertCidrToRange(line[0]).Item2,
                    })
                    .ToList();
    
                //string sql =
                //  "INSERT INTO geoip_countries(Flag, Name) VALUES\r\n" +
                //  string.Join(",\r\n", countries.Select(country => "(\"" + country.Flag + "\", \"" + country.Name + "\")").ToArray()) + "\r\n" +
                //  "INSERT INTO geoip_ipranges(CountryID, `From`, `To`) VALUES\r\n" +
                //  string.Join(",\r\n", ipRanges.Select(iprange => "(\"" + iprange.Country.DatabaseID + "\", \"" + iprange.From + "\", \"" + iprange.To + "\")").ToArray());
    
                //File.WriteAllText("Import.sql", sql);
    
                using (MySqlConnection sql = new MySqlConnection("Server=localhost;Database=test_db;Uid=root;"))
                {
                    sql.Open();
    
                    foreach (Country country in countries)
                    {
                        new MySqlCommand("INSERT INTO geoip_countries(Flag, Name) VALUES(\"" + country.Flag + "\", \"" + country.Name + "\")", sql).ExecuteNonQuery();
                    }
                    foreach (IPRange ipRange in ipRanges)
                    {
                        new MySqlCommand("INSERT INTO geoip_ipranges(CountryID, `From`, `To`) VALUES(\"" + ipRange.Country.DatabaseID + "\", \"" + ipRange.From + "\", \"" + ipRange.To + "\")", sql).ExecuteNonQuery();
                        Console.WriteLine(ipRange.To);
                    }
    
                    sql.Close();
                }
            }
    
            private static Tuple<uint, uint> ConvertCidrToRange(string cidr)
            {
                string[] parts = cidr.Split('.', '/');
                uint ipnum = Convert.ToUInt32(parts[0]) << 24 | Convert.ToUInt32(parts[1]) << 16 | Convert.ToUInt32(parts[2]) << 8 | Convert.ToUInt32(parts[3]);
                uint mask = uint.MaxValue << (32 - Convert.ToInt32(parts[4]));
                return Tuple.Create(ipnum & mask, ipnum | (mask ^ uint.MaxValue));
            }
        }
    
        public class Country
        {
            public int ID { get; set; }
            public int DatabaseID { get; set; }
            public string Flag { get; set; }
            public string Name { get; set; }
        }
    
        public class IPRange
        {
            public Country Country { get; set; }
            public uint From { get; set; }
            public uint To { get; set; }
        }
    }