Search code examples
rubydatabasesqlitesequel

Why aren'y my associations created properly in Ruby/Sequel?


I am trying to build a database that has two principal tables build from csv input and link them together, but the associations are seemingly not being made – that is querying Language to get the family they belong returns nil every time. I am either missing how Sequel works, or how should associations be construed in general.

My input data is:

language_families_csv

level family_code parent_code meta_comment
1 INE Indo-European
2 BAT INE Baltic
3 wes-BAT BAT Western Baltic
3 eas-BAT BAT Eastern Baltic
2 HYX INE Armenian
2 CEL INE Celtic
2 SLA INE Slavic
3 ZLE SLA East Slavic
3 ZLS SLA South Slavic
4 eas-ZLS ZLS Eastern South Slavic
4 wes-ZLS ZLS Western South Slavic
3 ZLW SLA West
4 leh-ZLW ZLW Lechitic
4 cze-ZLW ZLW Czech-Slovak
4 WEN ZLW Sorbian

languages_csv

language_code parent_family meta_comment
ENG GMW English
FRA ROA French
GER GMW German
SPA ROA Spanish
POL leh-ZLW Polish
CES cze-ZLW Czech
RUS eas-ZLS Russian
UKR eas-ZLS Ukrainian
BEL eas-ZLS Belarusian
CSB leh-ZLW Kashubian
POX leh-ZLW Polabian
SLK cze-ZLW Slovak

My schema:

module Language_to_LanguageFamily
    def self.included(base)
        base.one_to_many :languages, key: :parent_family, class: :Language
        base.many_to_one :parent_family, class: :LanguageFamily, key: :family_code, primary_key: :family_code
    end
end  


### Language_Families ###
MyDB.create_table :Language_Families do
    primary_key :language_family_id
    Integer :level, index: true, null: false
    String :family_code, size: 7, index: true, null: false, unique: true
    String :meta_comment, text: true
    
    # with a self-referential association to the primary key
    foreign_key :parent_code, :Language_Families, key: :language_family_id
end

class LanguageFamily < Sequel::Model
    plugin :timestamps, update_on_create: true, force: true
    plugin :json_serializer
    plugin :validation_helpers

    # self-referrential association
    one_to_many :children, class: :LanguageFamily, key: :parent_code
    many_to_one :parent, class: :LanguageFamily, key: :parent_code

    # external associations
    include Language_to_LanguageFamily
end

### Languages ###
MyDB.create_table :Languages do
    primary_key :language_id
    String :language_code, size: 7, index: true, null: false, unique: true
    String :meta_comment, text: true

    # ↓ optional association
    foreign_key :parent_family, :Language_Families, key: :family_code, null: true, default: nil
end

class Language < Sequel::Model
    plugin :timestamps, update_on_create: true, force: true
    plugin :json_serializer
    plugin :validation_helpers
    
    # external associations
    include Language_to_LanguageFamily

    # ↓ allows accessing foreign keys
    attr_accessor :parent_family
end

and my code:

require 'csv'
require 'sequel'

MyDB = Sequel.connect adapter: :sqlite, database: 'MirDB.sqlite'

MyDB.drop_table(*db.tables)

# Create an empty hash to store the id of each language family code
code_id = {}

# insert data into Language_Families DB
language_families_csv.each do |family|
    # Create a new LanguageFamily object
    language_family = LanguageFamily.new
    language_family.level = family["level"].to_i
    language_family.family_code = family["family_code"]
    language_family.meta_comment = family["meta_comment"]
    if parent_code = family["parent_code"]
        language_family.parent = LanguageFamily.find(parent_code: parent_code)
    end
    language_family.save
    code_id[language_family.family_code] = language_family.id
end

# insert data into Language DB
languages_csv.each do |lang_row|
    # Create a new Language object
    language = Language.new
    language.language_code = lang_row["language_code"]
    language.meta_comment = lang_row["meta_comment"]
    
    # set the association to the corresponding language family if present in languages
    if lang_row["parent_family"]    # ← if in languages
        lang_parent_family = LanguageFamily.first(family_code: lang_row["parent_family"])
        if lang_parent_family       # ← if such family exists
            language.parent_family = lang_parent_family
            # puts "#{language.parent_family} ↔ #{lang_parent_family.family_code}"
        else
            puts 'Family '.red + lang_parent_family.family_code.cyan + ' doesn’t exist!'.red
        end
    end
    
    language.save
end

Finally, when I run puts Language.all.to_s, all I get is:

[
    #<Language @values={:language_id=>1, :language_code=>"ENG", :meta_comment=>"English", :parent_family=>nil}>,
    #<Language @values={:language_id=>2, :language_code=>"FRA", :meta_comment=>"French", :parent_family=>nil}>,
    #<Language @values={:language_id=>3, :language_code=>"GER", :meta_comment=>"German", :parent_family=>nil}>,
    #<Language @values={:language_id=>4, :language_code=>"SPA", :meta_comment=>"Spanish", :parent_family=>nil}>,
    #<Language @values={:language_id=>5, :language_code=>"POL", :meta_comment=>"Polish", :parent_family=>nil}>,
    #<Language @values={:language_id=>6, :language_code=>"CES", :meta_comment=>"Czech", :parent_family=>nil}>,
    #<Language @values={:language_id=>7, :language_code=>"RUS", :meta_comment=>"Russian", :parent_family=>nil}>,
    #<Language @values={:language_id=>8, :language_code=>"UKR", :meta_comment=>"Ukrainian", :parent_family=>nil}>,
    #<Language @values={:language_id=>9, :language_code=>"BEL", :meta_comment=>"Belarusian", :parent_family=>nil}>,
    #<Language @values={:language_id=>10, :language_code=>"CSB", :meta_comment=>"Kashubian", :parent_family=>nil}>,
    #<Language @values={:language_id=>11, :language_code=>"POX", :meta_comment=>"Polabian", :parent_family=>nil}>,
    #<Language @values={:language_id=>12, :language_code=>"SLK", :meta_comment=>"Slovak", :parent_family=>nil}>,
    (…)
]

Solution

  • I've uncomplicated your set up a bit. You have to make sure your code works, before you start extracting modules, it makes it that much harder to figure out what's going on. Also this is my first time using Sequel.

    require "sequel"
    require "csv"
    
    db = Sequel.connect("sqlite://language.db")
    db.drop_table(*db.tables)
    
    #     language_families             languages
    #     .---------------.        .---------------.
    # .-->| code       pk |<---.   | code       pk |
    # |   | name          |    |   | name          |
    # |   | level         |    `---| family_id  fk |
    # `---| parent_id  fk |        `---------------`
    #     `---------------`
    
    db.create_table :language_families do
      String  :code, primary_key: true
      Integer :level
      String  :name      # `meta_comment` sure looks like `name` to me
      String  :parent_id # primary_key is a string so this is a string
    end
    
    db.create_table :languages do
      String :code, primary_key: true
      String :name
      String :family_id
    end
    
    # you should avoid unnecessary prefixes, if you can:
    # `family_code` in LanguageFamily, just `code` is sufficient
    # `language_code` in Language, just `code` is sufficient
    # `parent_family` in Language, just `family` is sufficient
    # otherwise everything will be `parent_something` and `something_code`
    
    # so far, i've learned this much:                     ActiveRecord equivalent:
    # one_to_many                                         has_many
    #   key:         column from the other table            foreign_key
    #   primary_key: column from the current table          primary_key
    #
    # many_to_one                                         belongs_to
    #   key:         column from the current table          foreign_key
    #   primary_key: column from the other table            primary_key 
    
    # I don't see how `Language_to_LanguageFamily` can work for both models 
    
    class LanguageFamily < Sequel::Model
      unrestrict_primary_key # because we need to assign primary key - `code`
    
      many_to_one :parent,   class: :LanguageFamily
      one_to_many :children, class: :LanguageFamily, key: :parent_id
    
      one_to_many :languages, key: :family_id
    
      # making a full tree structure is more involved than parent/children
      # relationships, this is just for a quick set up
      def descendants
        [self, children.map(&:descendants)].flatten
      end
    
      def descendant_languages
        Language.where(family_id: descendants.map(&:code)).all
      end
    end
    
    class Language < Sequel::Model
      unrestrict_primary_key
    
      many_to_one :family, class: :LanguageFamily
    
      # from included module, i'm not sure how language has more languages, but if
      # you mean other languages from the same family
      def sibling_languages
        return [] unless family 
        family.languages.reject { |lang| lang.code == self.code }
      end
    end
    
    puts "==> Loading language families"
    CSV.table("language_families.csv").each do |row|
      language_family = LanguageFamily.new({
        code:   row[:family_code],
        level:  row[:level],
        name:   row[:meta_comment],
        # this will set `parent_id`
        parent: LanguageFamily.find(code: row[:parent_code])
      })
    
      p language_family.save
    end
    
    puts "==> Loading languages"
    CSV.table("languages.csv").each do |row|
      language = Language.new({
        code:   row[:language_code],
        name:   row[:meta_comment],
        # this will set `family_id`
        family: LanguageFamily.find(code: row[:parent_family])
      })
    
      # # if you want to do this as a separate step
      # if (family = LanguageFamily.find(code: row[:parent_family]))
      #   language.family = family
      # else
      #   puts "Family #{row[:parent_family]} doesn't exist!"
      # end
    
      p language.save
    end
    

    Test:

    >> LanguageFamily.last.parent
    => #<LanguageFamily @values={:code=>"ZLS", :level=>3, :name=>"South Slavic", :parent_id=>"SLA"}>
    
    >> LanguageFamily.find(code: "leh-ZLW").languages
    => [#<Language @values={:code=>"POL", :name=>"Polish", :family_id=>"leh-ZLW"}>,
        #<Language @values={:code=>"CSB", :name=>"Kashubian", :family_id=>"leh-ZLW"}>,
        #<Language @values={:code=>"POX", :name=>"Polabian", :family_id=>"leh-ZLW"}>]
    
    >> LanguageFamily.find(code: "ZLW").languages
    => [] # no languages in this family, but plenty in child families
    >> LanguageFamily.find(code: "ZLW").descendant_languages
    => [#<Language @values={:code=>"POL", :name=>"Polish", :family_id=>"leh-ZLW"}>,
        #<Language @values={:code=>"CES", :name=>"Czech", :family_id=>"cze-ZLW"}>,
        #<Language @values={:code=>"CSB", :name=>"Kashubian", :family_id=>"leh-ZLW"}>,
        #<Language @values={:code=>"POX", :name=>"Polabian", :family_id=>"leh-ZLW"}>,
        #<Language @values={:code=>"SLK", :name=>"Slovak", :family_id=>"cze-ZLW"}>]
    #                                i guess you can also do "LIKE '%-ZLW'" ^
    
    >> LanguageFamily.find(code: "ZLW").parent.parent.descendants.count
    => 15
    
    >> Language.find(code: "POL").sibling_languages
    => [#<Language @values={:code=>"CSB", :name=>"Kashubian", :family_id=>"leh-ZLW"}>, 
        #<Language @values={:code=>"POX", :name=>"Polabian", :family_id=>"leh-ZLW"}>]
    

    Import test:

    >> load "app.rb"
    ==> Loading language families
    #<LanguageFamily @values={:code=>"INE", :level=>1, :name=>"Indo-European", :parent_id=>nil}>
    #<LanguageFamily @values={:code=>"BAT", :level=>2, :name=>"Baltic", :parent_id=>"INE"}>
    #<LanguageFamily @values={:code=>"wes-BAT", :level=>3, :name=>"Western Baltic", :parent_id=>"BAT"}>
    #<LanguageFamily @values={:code=>"eas-BAT", :level=>3, :name=>"Eastern Baltic", :parent_id=>"BAT"}>
    #<LanguageFamily @values={:code=>"HYX", :level=>2, :name=>"Armenian", :parent_id=>"INE"}>
    #<LanguageFamily @values={:code=>"CEL", :level=>2, :name=>"Celtic", :parent_id=>"INE"}>
    #<LanguageFamily @values={:code=>"SLA", :level=>2, :name=>"Slavic", :parent_id=>"INE"}>
    #<LanguageFamily @values={:code=>"ZLE", :level=>3, :name=>"East Slavic", :parent_id=>"SLA"}>
    #<LanguageFamily @values={:code=>"ZLS", :level=>3, :name=>"South Slavic", :parent_id=>"SLA"}>
    #<LanguageFamily @values={:code=>"eas-ZLS", :level=>4, :name=>"Eastern South Slavic", :parent_id=>"ZLS"}>
    #<LanguageFamily @values={:code=>"wes-ZLS", :level=>4, :name=>"Western South Slavic", :parent_id=>"ZLS"}>
    #<LanguageFamily @values={:code=>"ZLW", :level=>3, :name=>"West", :parent_id=>"SLA"}>
    #<LanguageFamily @values={:code=>"leh-ZLW", :level=>4, :name=>"Lechitic", :parent_id=>"ZLW"}>
    #<LanguageFamily @values={:code=>"cze-ZLW", :level=>4, :name=>"Czech-Slovak", :parent_id=>"ZLW"}>
    #<LanguageFamily @values={:code=>"WEN", :level=>4, :name=>"Sorbian", :parent_id=>"ZLW"}>
    
    ==> Loading languages
    #<Language @values={:code=>"ENG", :name=>"English", :family_id=>nil}>
    #<Language @values={:code=>"FRA", :name=>"French", :family_id=>nil}>
    #<Language @values={:code=>"GER", :name=>"German", :family_id=>nil}>
    #<Language @values={:code=>"SPA", :name=>"Spanish", :family_id=>nil}>
    #<Language @values={:code=>"POL", :name=>"Polish", :family_id=>"leh-ZLW"}>
    #<Language @values={:code=>"CES", :name=>"Czech", :family_id=>"cze-ZLW"}>
    #<Language @values={:code=>"RUS", :name=>"Russian", :family_id=>"eas-ZLS"}>
    #<Language @values={:code=>"UKR", :name=>"Ukrainian", :family_id=>"eas-ZLS"}>
    #<Language @values={:code=>"BEL", :name=>"Belarusian", :family_id=>"eas-ZLS"}>
    #<Language @values={:code=>"CSB", :name=>"Kashubian", :family_id=>"leh-ZLW"}>
    #<Language @values={:code=>"POX", :name=>"Polabian", :family_id=>"leh-ZLW"}>
    #<Language @values={:code=>"SLK", :name=>"Slovak", :family_id=>"cze-ZLW"}>
    => true