MySQL记录大小限制

Tags
Abstract
Property
 
MySQL在引擎层和存储层对单表的列数都进行了限制,下表是常见类型的列在MySQL Server层和Innodb引擎层计算列大小时使用的字节数:
Mysql 字段类型
类型
Server层计算长度
Innodb 存储计算长度 bytes
json
MYSQL_TYPE_JSON
12(4+8)
40+1
text
MYSQL_TYPE_BLOB
10(2+8)
40+1
varchar(1)
MYSQL_TYPE_VARCHAR
5(1*4 +1)
5
- 大于40则按照page-off存储,最多使用41个字节;小于等于40则按照实际字节数+1计算 - 字节数:字符函数 * 4(utf8mb4)
varchar(10)
MYSQL_TYPE_VARCHAR
41(40+1)
40+1
varchar(255)
MYSQL_TYPE_VARCHAR
1022(255*4+2)
40+1
decimal(38,10)
MYSQL_TYPE_NEWDECIMAL
18
18
decimal(27,9)
MYSQL_TYPE_NEWDECIMAL
12
12
int
MYSQL_TYPE_LONG
4
4
boolean
MYSQL_TYPE_BOOL
1
1
bigint
MYSQL_TYPE_LONGLONG
8
8
double
MYSQL_TYPE_DOUBLE
8
8
笔者用飞书Sheet做了一个计算表,可以输入不同类型的列数然后自动计算记录占用大小,如有需要可以直接使用: MySQL记录大小计算表

Server层限制

两个硬性条件:
  • 最多支持4096列
  • 所有列加起来长度不能超过65535
计算方法如下,对于常见类型字段的pack_length在上面的表中已经列出:
// 空值bit占用字节数 nullable_bytes = Ceil(nullable_fiedls/8) // 每列占用长度 data_length = // 占用总字节数 total_length = Ceil(nullable_fiedls/8) +

代码实现

mysql_prepare_create_table

// 65535 uint max_record_length() const { return std::min(HA_MAX_REC_LENGTH, max_supported_record_length()); } // sql/sql_table.cc:8373 // Prepares the table and key structures for table creation. bool mysql_prepare_create_table( THD *thd, const char *error_schema_name, const char *error_table_name, HA_CREATE_INFO *create_info, Alter_info *alter_info, handler *file, bool is_partitioned, KEY **key_info_buffer, uint *key_count, FOREIGN_KEY **fk_key_info_buffer, uint *fk_key_count, FOREIGN_KEY *existing_fks, uint existing_fks_count, const dd::Table *existing_fks_table, uint fk_max_generated_name_number, int select_field_count, bool find_parent_keys) { DBUG_TRACE; //...... /* If fixed row records, we need one bit to check for deleted rows */ if (!(create_info->table_options & HA_OPTION_PACK_RECORD)) create_info->null_bits++; ulong data_offset = (create_info->null_bits + 7) / 8; size_t reclength = data_offset; it.rewind(); while ((sql_field = it++)) { size_t length = sql_field->pack_length(); if (sql_field->offset + data_offset + length > reclength) reclength = sql_field->offset + data_offset + length; } if (reclength > file->max_record_length()) { my_error(ER_TOO_BIG_ROWSIZE, MYF(0), static_cast<long>(file->max_record_length())); return true; } return false; }

pack_length

size_t Create_field::pack_length(bool dont_override) const { if (!dont_override && pack_length_override != 0) return pack_length_override; switch (sql_type) { case MYSQL_TYPE_SET: { return get_set_pack_length(interval == nullptr ? interval_list.elements : interval->count); } case MYSQL_TYPE_ENUM: { return get_enum_pack_length(interval == nullptr ? interval_list.elements : interval->count); } case MYSQL_TYPE_NEWDECIMAL: { assert(decimals <= DECIMAL_MAX_SCALE); uint precision = my_decimal_length_to_precision( max_display_width_in_bytes(), decimals, (flags & UNSIGNED_FLAG)); precision = std::min(precision, static_cast<uint>(DECIMAL_MAX_PRECISION)); return my_decimal_get_binary_size(precision, decimals); } case MYSQL_TYPE_BIT: { if (treat_bit_as_char) { return (max_display_width_in_bytes() + 7) / 8; } else { return max_display_width_in_bytes() / 8; } } default: { return calc_pack_length(sql_type, max_display_width_in_bytes()); } } }

max_display_width_in_bytes

size_t Create_field::max_display_width_in_bytes() const { // It might seem unnecessary to have special case for the various BLOB types // instead of just using the "else" clause for these types as well. However, // that might give us rounding errors for multi-byte character sets. One // example is JSON which has the character set utf8mb4_bin. // max_display_width_in_codepoints() will return 1073741823 (truncated from // 1073741823.75), and multiplying that by four again will give 4294967292 // which is the wrong result. assert(charset != nullptr); if (is_numeric_type(sql_type) || is_temporal_real_type(sql_type) || sql_type == MYSQL_TYPE_YEAR || sql_type == MYSQL_TYPE_BIT) { // Numeric types, temporal types, YEAR or BIT are never multi-byte. return max_display_width_in_codepoints(); } else if (sql_type == MYSQL_TYPE_TINY_BLOB) { return Field::MAX_TINY_BLOB_WIDTH; // 255 } else if (sql_type == MYSQL_TYPE_BLOB && !explicit_display_width()) { // For BLOB and TEXT, the user can give a display width (BLOB(25), TEXT(25)) // where the expected behavior is that the server will find the smallest // possible BLOB/TEXT type that will fit the given display width. If the // user has given an explicit display width, return that instead of the // max BLOB size. return Field::MAX_SHORT_BLOB_WIDTH; // 65535 64KB } else if (sql_type == MYSQL_TYPE_MEDIUM_BLOB) { return Field::MAX_MEDIUM_BLOB_WIDTH; /// 16777215 16MB } else if (sql_type == MYSQL_TYPE_LONG_BLOB || sql_ty pe == MYSQL_TYPE_JSON || sql_type == MYSQL_TYPE_GEOMETRY) { return Field::MAX_LONG_BLOB_WIDTH; // 4294967295 4G } else { // If the user has given a display width to the TEXT type where the display // width is 2^32-1, the below computation will exceed // MAX_LONG_BLOB_WIDTH if the character set is multi-byte. So we must // ensure that we never return a value greater than // MAX_LONG_BLOB_WIDTH. std::int64_t display_width = max_display_width_in_codepoints() * static_cast<std::int64_t>(charset->mbmaxlen); // 4 return static_cast<size_t>(std::min( display_width, static_cast<std::int64_t>(Field::MAX_LONG_BLOB_WIDTH))); } }

calc_pack_length

size_t calc_pack_length(enum_field_types type, size_t length) { switch (type) { case MYSQL_TYPE_VAR_STRING: case MYSQL_TYPE_STRING: case MYSQL_TYPE_DECIMAL: return (length); case MYSQL_TYPE_VARCHAR: return (length + (length < 256 ? 1 : 2)); case MYSQL_TYPE_BOOL: case MYSQL_TYPE_YEAR: case MYSQL_TYPE_TINY: return 1; case MYSQL_TYPE_SHORT: return 2; case MYSQL_TYPE_INT24: case MYSQL_TYPE_NEWDATE: return 3; case MYSQL_TYPE_TIME: return 3; case MYSQL_TYPE_TIME2: return length > MAX_TIME_WIDTH ? my_time_binary_length(length - MAX_TIME_WIDTH - 1) : 3; case MYSQL_TYPE_TIMESTAMP: return 4; case MYSQL_TYPE_TIMESTAMP2: return length > MAX_DATETIME_WIDTH ? my_timestamp_binary_length(length - MAX_DATETIME_WIDTH - 1) : 4; case MYSQL_TYPE_DATE: case MYSQL_TYPE_LONG: return 4; case MYSQL_TYPE_FLOAT: return sizeof(float); case MYSQL_TYPE_DOUBLE: return sizeof(double); case MYSQL_TYPE_DATETIME: return 8; case MYSQL_TYPE_DATETIME2: return length > MAX_DATETIME_WIDTH ? my_datetime_binary_length(length - MAX_DATETIME_WIDTH - 1) : 5; case MYSQL_TYPE_LONGLONG: return 8; /* Don't crash if no longlong */ case MYSQL_TYPE_NULL: return 0; case MYSQL_TYPE_TINY_BLOB: return 1 + portable_sizeof_char_ptr; case MYSQL_TYPE_BLOB: return 2 + portable_sizeof_char_ptr; case MYSQL_TYPE_MEDIUM_BLOB: return 3 + portable_sizeof_char_ptr; case MYSQL_TYPE_LONG_BLOB: return 4 + portable_sizeof_char_ptr; case MYSQL_TYPE_GEOMETRY: return 4 + portable_sizeof_char_ptr; case MYSQL_TYPE_JSON: return 4 + portable_sizeof_char_ptr; case MYSQL_TYPE_SET: case MYSQL_TYPE_ENUM: case MYSQL_TYPE_NEWDECIMAL: assert(false); return 0; // This shouldn't happen case MYSQL_TYPE_BIT: return length / 8; case MYSQL_TYPE_INVALID: case MYSQL_TYPE_TYPED_ARRAY: break; } assert(false); return 0; }

存储层限制/InnoDB限制

  • 单表限制最多1017列
  • 单行数据长度最多不能超过8126
下图是默认Dynamic 类型记录在Page中的存储格式:
notion image
记录大小计算方法:
data_size:
  • 主键大小,如果没有主键则会分配一个默认的row id,占用6个字节
  • TRX_ID: 6
  • DB_ROLL_PTR: 7
  • 用户列,由表1中的长度决定每列占用空间,只用考虑数据空间,不用将extra_size算在内,比如json/text/blob需要41个字节,数据占用40个字节
    • 建表或者插入列计算取min(40, #col_len)
extra_size:
  • rec_get_n_fields_length: 如果是通过instant加过列的表则需要单独记录列数,列数小于128使用1个字节,否则2个字节;只有插入数据时候会算上,建表和加列没有这一项
  • NULL值列表:Ceil(nullable/8)
  • REC_N_NEW_EXTRA_BYTES: 5个字节,记录头信息
  • 变长字段列表
    • 如果字节数大于128则需要2个字节
    • extranal字段(text/blob/json/varchar)需要2个字节
    • 对于建表或者插入这里计算均为1个字节
 
建表/加列计算方法:
total_size = extra_size + data_size = Ceil(nullable/8) + 5 + 1 * #varlen + (#KEY + 6 + 7 + )
插入数据计算方法:
total_size = extra_size + data_size = ([0|1|2] + Ceil(nullable/8) + 5 + 2 * #varlen + (#KEY + 6 + 7 + )

代码实现

dict_index_too_big_for_tree/创建表加列

/** If a record of this index might not fit on a single B-tree page, return TRUE. @return true if the index record could become too big */ static bool dict_index_too_big_for_tree( const dict_table_t *table, /*!< in: table */ const dict_index_t *new_index, /*!< in: index */ bool strict) /*!< in: TRUE=report error if records could be too big to fit in an B-tree page */ { ulint comp; ulint i; /* maximum possible storage size of a record */ ulint rec_max_size; /* maximum allowed size of a record on a leaf page */ ulint page_rec_max; /* maximum allowed size of a node pointer record */ ulint page_ptr_max; /* FTS index consists of auxiliary tables, they shall be excluded from index row size check */ if (new_index->type & DICT_FTS) { return (false); } DBUG_EXECUTE_IF("ib_force_create_table", return (FALSE);); comp = dict_table_is_comp(table); const page_size_t page_size(dict_table_page_size(table)); if (page_size.is_compressed() && page_size.physical() < univ_page_size.physical()) { /* On a compressed page, two records must fit in the uncompressed page modification log. On compressed pages with size.physical() == univ_page_size.physical(), this limit will never be reached. */ ut_ad(comp); /* The maximum allowed record size is the size of an empty page, minus a byte for recoding the heap number in the page modification log. The maximum allowed node pointer size is half that. */ page_rec_max = page_zip_empty_size(new_index->n_fields, page_size.physical()); if (page_rec_max) { page_rec_max--; } page_ptr_max = page_rec_max / 2; /* On a compressed page, there is a two-byte entry in the dense page directory for every record. But there is no record header. */ rec_max_size = 2; } else { /* The maximum allowed record size is half a B-tree page(16k for 64k page size). No additional sparse page directory entry will be generated for the first few user records. */ page_rec_max = srv_page_size == UNIV_PAGE_SIZE_MAX ? REC_MAX_DATA_SIZE - 1 : page_get_free_space_of_empty(comp) / 2; page_ptr_max = page_rec_max; /* Each record has a header. */ rec_max_size = comp ?REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES; // 5 } if (comp) { /* Include the "null" flags in the maximum possible record size. */ rec_max_size += UT_BITS_IN_BYTES(new_index->n_nullable); } else { /* For each column, include a 2-byte offset and a "null" flag. The 1-byte format is only used in short records that do not contain externally stored columns. Such records could never exceed the page limit, even when using the 2-byte format. */ rec_max_size += 2 * new_index->n_fields; } /* Compute the maximum possible record size. */ for (i = 0; i < new_index->n_fields; i++) { const dict_field_t *field = new_index->get_field(i); const dict_col_t *col = field->col; ulint field_max_size; ulint field_ext_max_size; /*In dtuple_convert_big_rec(), variable-length columns that are longer than BTR_EXTERN_LOCAL_STORED_MAX_SIZE may be chosen for external storage. Fixed-length columns, and all columns of secondary index records are always stored inline. */ /* Determine the maximum length of the index field. The field_ext_max_size should be computed as the worst case in rec_get_converted_size_comp() for REC_STATUS_ORDINARY records. */ field_max_size = col->get_fixed_size(comp); if (field_max_size && field->fixed_len != 0) { /* dict_index_add_col() should guarantee this */ ut_ad(!field->prefix_len || field->fixed_len == field->prefix_len); /* Fixed lengths are not encoded in ROW_FORMAT=COMPACT. */ field_ext_max_size = 0; goto add_field_size; } // BLOB类型为:18446744073709551614 field_max_size = col->get_max_size(); field_ext_max_size = field_max_size < 256 ? 1 : 2; if (field->prefix_len) { if (field->prefix_len < field_max_size) { field_max_size = field->prefix_len; } } else if (field_max_size > BTR_EXTERN_LOCAL_STORED_MAX_SIZE && new_index->is_clustered()) { /* In the worst case, we have a locally stored column of BTR_EXTERN_LOCAL_STORED_MAX_SIZE bytes. The length can be stored in one byte. If the column were stored externally, the lengths in the clustered index page would be BTR_EXTERN_FIELD_REF_SIZE and 2. */ field_max_size = BTR_EXTERN_LOCAL_STORED_MAX_SIZE; field_ext_max_size = 1; } if (comp) { /* Add the extra size for ROW_FORMAT=COMPACT. For ROW_FORMAT=REDUNDANT, these bytes were added to rec_max_size before this loop. */ rec_max_size += field_ext_max_size; } add_field_size: rec_max_size += field_max_size; /* Check the size limit on leaf pages. */ if (rec_max_size >= page_rec_max) { ib::error_or_warn(strict) << "Cannot add field " << field->name << " in table " << table->name << " because after adding it, the row size is " << rec_max_size << " which is greater than maximum allowed" " size (" << page_rec_max << ") for a record on index leaf page."; return (true); } /* Check the size limit on non-leaf pages. Records stored in non-leaf B-tree pages consist of the unique columns of the record (the key columns of the B-tree) and a node pointer field. When we have processed the unique columns, rec_max_size equals the size of the node pointer record minus the node pointer column. */ if (i + 1 == dict_index_get_n_unique_in_tree(new_index) && rec_max_size + REC_NODE_PTR_SIZE >= page_ptr_max) { return (true); } } return (false); }

rec_get_converted_size_comp_prefix_low/插入数据

/** Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT. @return total size */ [[nodiscard]] static inline ulint rec_get_converted_size_comp_prefix_low( const dict_index_t *index, /*!< in: record descriptor; dict_table_is_comp() is assumed to hold, even if it does not */ const dfield_t *fields, /*!< in: array of data fields */ ulint n_fields, /*!< in: number of data fields */ const dtuple_t *v_entry, /*!< in: dtuple contains virtual column data */ ulint *extra, /*!< out: extra size */ ulint *status, /*!< in: status bits of the record, can be nullptr if unnecessary */ bool temp) /*!< in: whether this is a temporary file record */ { ulint extra_size = 0; ulint data_size; ulint i; ulint n_null = 0; ulint n_v_fields; ut_ad(n_fields <= dict_index_get_n_fields(index)); ut_ad(!temp || extra); /* At the time being, only temp file record could possible store virtual columns */ ut_ad(!v_entry || (index->is_clustered() && temp)); n_v_fields = v_entry ? dtuple_get_n_v_fields(v_entry) : 0; if (n_fields > 0) { n_null = index->has_instant_cols() ? index->get_n_nullable_before(static_cast<uint32_t>(n_fields)) : index->n_nullable; } if (index->is_tuple_instant_format(n_fields) && status != nullptr) { switch (UNIV_EXPECT(*status, REC_STATUS_ORDINARY)) { case REC_STATUS_ORDINARY: ut_ad(!temp && n_fields > 0); // > 128使用2个字节,这个只有在instant加过列的表才需要 extra_size +=rec_get_n_fields_length(n_fields); break; case REC_STATUS_NODE_PTR: ut_ad(!temp && n_fields > 0); n_null = index->n_instant_nullable; break; case REC_STATUS_INFIMUM: case REC_STATUS_SUPREMUM: break; } } // REC_N_NEW_EXTRA_BYTES是5个字节,UT_BITS_IN_BYTES则为除以8向上取整 extra_size += temp ? UT_BITS_IN_BYTES(n_null) : REC_N_NEW_EXTRA_BYTES + UT_BITS_IN_BYTES(n_null); data_size = 0; if (temp && dict_table_is_comp(index->table)) { /* No need to do adjust fixed_len=0. We only need to adjust it for ROW_FORMAT=REDUNDANT. */ temp = false; } /* read the lengths of fields 0..n */ for (i = 0; i < n_fields; i++) { const dict_field_t *field; ulint len; ulint fixed_len; const dict_col_t *col; field = index->get_field(i); len = dfield_get_len(&fields[i]); col = field->col; #ifdef UNIV_DEBUG dtype_t *type; type = dfield_get_type(&fields[i]); if (dict_index_is_spatial(index)) { if (DATA_GEOMETRY_MTYPE(col->mtype) && i == 0) { ut_ad(type->prtype & DATA_GIS_MBR); } else { ut_ad(type->mtype == DATA_SYS_CHILD || col->assert_equal(type)); } } else { ut_ad(col->assert_equal(type)); } #endif /* All NULLable fields must be included in the n_null count. */ ut_ad((col->prtype & DATA_NOT_NULL) || n_null--); if (dfield_is_null(&fields[i])) { /* No length is stored for NULL fields. */ ut_ad(!(col->prtype & DATA_NOT_NULL)); continue; } ut_ad(len <= col->len || DATA_LARGE_MTYPE(col->mtype) || (DATA_POINT_MTYPE(col->mtype) && len == DATA_MBR_LEN) || ((col->mtype == DATA_VARCHAR || col->mtype == DATA_BINARY || col->mtype == DATA_VARMYSQL) && (col->len == 0 || len <= col->len + prtype_get_compression_extra(col->prtype)))); fixed_len = field->fixed_len; if (temp && fixed_len && !col->get_fixed_size(temp)) { fixed_len = 0; } /* If the maximum length of a variable-length field is up to 255 bytes, the actual length is always stored in one byte. If the maximum length is more than 255 bytes, the actual length is stored in one byte for 0..127. The length will be encoded in two bytes when it is 128 or more, or when the field is stored externally. */ if (fixed_len) { #ifdef UNIV_DEBUG ulint mbminlen = DATA_MBMINLEN(col->mbminmaxlen); ulint mbmaxlen = DATA_MBMAXLEN(col->mbminmaxlen); ut_ad(len <= fixed_len); if (dict_index_is_spatial(index)) { ut_ad(type->mtype == DATA_SYS_CHILD || !mbmaxlen || len >= mbminlen * (fixed_len / mbmaxlen)); } else { ut_ad(type->mtype != DATA_SYS_CHILD); ut_ad(!mbmaxlen || len >= mbminlen * (fixed_len / mbmaxlen)); } /* dict_index_add_col() should guarantee this */ ut_ad(!field->prefix_len || fixed_len == field->prefix_len); #endif /* UNIV_DEBUG */ } else if (dfield_is_ext(&fields[i])) { ut_ad(DATA_BIG_COL(col)); extra_size += 2; } else if (len < 128 || !DATA_BIG_COL(col)) { extra_size++; } else { /* For variable-length columns, we look up the maximum length from the column itself. If this is a prefix index column shorter than 256 bytes, this will waste one byte. */ extra_size += 2; } data_size += len; } if (extra) { *extra = extra_size; } //.... return (extra_size + data_size); }