Search code examples
mysqlquery-optimizationinnodbcost-based-optimizer

How does MySQL calculated a query cost?


I have this query:

 explain format=json
 select count(*)
 from info2 -- force index(idx_02)
 where order_code = '1027' and issue = '20220720'
 and state = 0 ;

Actually choose idx_02 is better than idx_01 ,but MySQL choose id_01.

Look at the two index cost ,idx_01 (129979.27) less than idx_02 (287195.76), but I don't understand how does MySQL calculated the idx_01 cost ?

How does MySQL to calculate the read_cost and eval_cost cost ?

Attached table schema and explain result .

Table schema:

CREATE TABLE `info2` 
(
  `id` bigint unsigned NOT NULL AUTO_INCREMENT,
  `shop_code` varchar(20) NOT NULL DEFAULT '',
  `user_id` int unsigned NOT NULL DEFAULT '0' ,
  `user_name` varchar(30) NOT NULL DEFAULT '' ,
  `record_code` char(20) NOT NULL DEFAULT '' ,
  `order_code` varchar(10) NOT NULL DEFAULT '',
  `issue` char(12) NOT NULL DEFAULT '' ,
  `order_number` varchar(700) NOT NULL DEFAULT '',
  `order_amount` decimal(18,4) NOT NULL ,
  `award` varchar(20) NOT NULL DEFAULT '' ,
  `order_count` int unsigned NOT NULL ,
  `state` tinyint unsigned NOT NULL DEFAULT '0',
  `order_unit` decimal(3,2) NOT NULL ,
  `order_id` bigint unsigned NOT NULL ,
  `source_id` tinyint unsigned NOT NULL ,
  `is_test` bit(1) NOT NULL DEFAULT b'0' ,
  `created_date` date NOT NULL DEFAULT '1970-01-01' ,
  `ip` varchar(45) NOT NULL DEFAULT '' ,
  `gmt_created` timestamp(3) NOT NULL DEFAULT CURRENT_TIMESTAMP(3),
  `gmt_modified` timestamp(3) NOT NULL DEFAULT CURRENT_TIMESTAMP(3) ON UPDATE CURRENT_TIMESTAMP(3),
  PRIMARY KEY (`id`),
  KEY `idx_01` (`issue`),
  KEY `idx_02` (`order_code`,`issue`,`state`,`order_number`),
  KEY `idx_03` (`user_id`) ,
  KEY `idx_04` (`order_id`) ,
  KEY `idx_05` (`created_date`,`is_test`,`order_code`,`state`) 
) ENGINE=InnoDB AUTO_INCREMENT=982131317 DEFAULT CHARSET=utf8 ;

Query explain not force index

{
  "query_block": {
    "select_id": 1,
    "cost_info": {
      "query_cost": "129979.27"
    },
    "table": {
      "table_name": "info2",
      "access_type": "ref",
      "possible_keys": [
        "idx_01",
        "idx_02"
      ],
      "key": "idx_01",
      "used_key_parts": [
        "issue"
      ],
      "key_length": "36",
      "ref": [
        "const"
      ],
      "rows_examined_per_scan": 1028525,
      "rows_produced_per_join": 10285,
      "filtered": "1.00",
      "cost_info": {
        "read_cost": "27126.77",
        "eval_cost": "1028.53",
        "prefix_cost": "129979.27",
        "data_read_per_join": "32M"
      },
      "used_columns": [
        "order_code",
        "issue",
        "state"
      ],
      "attached_condition": "((`jerry`.`info2`.`state` = 0) and (`jerry`.`info2`.`order_code` = '1027'))"
    }
  }
}

Query explain with force index

{
  "query_block": {
    "select_id": 1,
    "cost_info": {
      "query_cost": "287195.76"
    },
    "table": {
      "table_name": "info2",
      "access_type": "ref",
      "possible_keys": [
        "idx_02"
      ],
      "key": "idx_02",
      "used_key_parts": [
        "order_code",
        "issue",
        "state"
      ],
      "key_length": "69",
      "ref": [
        "const",
        "const",
        "const"
      ],
      "rows_examined_per_scan": 820642,
      "rows_produced_per_join": 820642,
      "filtered": "100.00",
      "using_index": true,
      "cost_info": {
        "read_cost": "205131.56",
        "eval_cost": "82064.20",
        "prefix_cost": "287195.76",
        "data_read_per_join": "2G"
      },
      "used_columns": [
        "order_code",
        "issue",
        "state"
      ]
    }
  }
}

Solution

  • Please file a bug at bugs.mysql.com. idx_02 is a "covering" index, and should be preferred over idx_01.

    A workaround is to replace

    INDEX(issue)
    

    with a covering index that can continue to be used when you only need issue:

    INDEX(issue, order_code, state)
    

    Note that I left out the bulky 4th column.