efcore icon indicating copy to clipboard operation
efcore copied to clipboard

High memory usage on Migrate with a large number of commands

Open SteffenMangold opened this issue 5 years ago • 7 comments

When using migrationBuilder.InsertData(... to insert seed data, there is a memory leak of Microsoft.EntityFrameworkCore.Storage.RelationalCommand.

I batch insert the data in 1,000 item page (max supported).

Reproduce (insert 18,250‬ rows):

int pageSize = 1000;

var startDate = new DateTime(1990, 01, 01);
var endDate = new DateTime(2040, 12, 31);
var dateValueCount = (endDate - startDate).Days;
var dateData = new object[dateValueCount + 1, 6];

for (DateTime date = startDate; date <= endDate; date = date.AddDays(1))
{
	var dateIndex = (date - startDate).Days;
	dateData[dateIndex, 0] = date.Date;
	dateData[dateIndex, 1] = (byte)date.Day;
	dateData[dateIndex, 2] = (byte)CultureInfo.InvariantCulture.Calendar.GetWeekOfYear(date, CalendarWeekRule.FirstFourDayWeek, DayOfWeek.Monday);
	dateData[dateIndex, 3] = (byte)date.Month;
	dateData[dateIndex, 4] = (byte)((date.Month + 2) / 3);
	dateData[dateIndex, 5] = (Int16)date.Year;
}


int currentDatePage = 0;
while ((currentDatePage * pageSize) < dateData.GetLength(0))
{
	var startIndex = currentDatePage * pageSize;
	var endIndex = dateData.GetLength(0) < startIndex + pageSize ? startIndex + dateData.GetLength(0) - startIndex : startIndex + pageSize;

	var dataPart = new object[endIndex - startIndex, 6];
	for (int i = startIndex; i < endIndex; i++)
	{
		dataPart[i - startIndex, 0] = dateData[i, 0];
		dataPart[i - startIndex, 1] = dateData[i, 1];
		dataPart[i - startIndex, 2] = dateData[i, 2];
		dataPart[i - startIndex, 3] = dateData[i, 3];
		dataPart[i - startIndex, 4] = dateData[i, 4];
		dataPart[i - startIndex, 5] = dateData[i, 5];

		migrationBuilder.InsertData(
			table: "Date",
			schema: "dim",
			columns: new string[]
			{
				"Date",
				"Day",
				"Week",
				"Month",
				"Quarter",
				"Year"
			},
			values: dataPart);
	}

	currentDatePage++;
}

Environment: Net Core 3.1, EF Core 3.1.1

SteffenMangold avatar Jan 25 '20 22:01 SteffenMangold

When I do it in single row insert (take waaay longer) it works without memory issue.

Example:

int pageSize = 1000;

var startDate = new DateTime(1990, 01, 01);
var endDate = new DateTime(2040, 12, 31);
var dateValueCount = (endDate - startDate).Days;
var dateData = new object[dateValueCount + 1, 6];

for (DateTime date = startDate; date <= endDate; date = date.AddDays(1))
{
	var result = migrationBuilder.InsertData(
		table: "Date",
		schema: "dim",
		columns: new string[]
		{
			"Date",
			"Day",
			"Week",
			"Month",
			"Quarter",
			"Year"
		},
		values: new object[]
		{
			date.Date,
			(byte)date.Day,
			(byte)CultureInfo.InvariantCulture.Calendar.GetWeekOfYear(date, CalendarWeekRule.FirstFourDayWeek, DayOfWeek.Monday),
			(byte)date.Month,
			(byte)((date.Month + 2) / 3),
			(Int16)date.Year
		});
}

SteffenMangold avatar Jan 25 '20 22:01 SteffenMangold

image

SteffenMangold avatar Jan 25 '20 22:01 SteffenMangold

Current Workaround (reducing the leak to a minimum):

int pageSize = 1000;

var startDate = new DateTime(1990, 01, 01);
var endDate = new DateTime(2040, 12, 31);
var dateValueCount = (endDate - startDate).Days;
var dateData = new object[dateValueCount + 1, 6];

for (DateTime date = startDate; date <= endDate; date = date.AddDays(1))
{
	var dateIndex = (date - startDate).Days;
	dateData[dateIndex, 0] = date.Date;
	dateData[dateIndex, 1] = (byte)date.Day;
	dateData[dateIndex, 2] = (byte)CultureInfo.InvariantCulture.Calendar.GetWeekOfYear(date, CalendarWeekRule.FirstFourDayWeek, DayOfWeek.Monday);
	dateData[dateIndex, 3] = (byte)date.Month;
	dateData[dateIndex, 4] = (byte)((date.Month + 2) / 3);
	dateData[dateIndex, 5] = (Int16)date.Year;
}

int currentDatePage = 0;
while ((currentDatePage * pageSize) < dateData.GetLength(0))
{
	commandBuilder.Clear();
	commandBuilder.AppendLine($"INSERT INTO [dim].[Date] VALUES");

	var startIndex = currentDatePage * pageSize;
	var endIndex = dateData.GetLength(0) < startIndex + pageSize ? startIndex + dateData.GetLength(0) - startIndex : startIndex + pageSize;

	var dataPart = new object[endIndex - startIndex, 6];
	for (int i = startIndex; i < endIndex; i++)
	{
		if (i != startIndex)
			commandBuilder.AppendLine(", ");

		commandBuilder.Append($"('{dateData[i, 0]:yyyy-MM-dd}'," +
			$" {dateData[i, 1]}," +
			$" {dateData[i, 2]}," +
			$" {dateData[i, 3]}," +
			$" {dateData[i, 4]}," +
			$" {dateData[i, 5]})");
	}

	commandBuilder.Append(";");
	migrationBuilder.Sql(commandBuilder.ToString());

	currentDatePage++;
}

SteffenMangold avatar Jan 25 '20 23:01 SteffenMangold

@AndriySvyryd @bricelam Thoughts on this?

ajcvickers avatar Jan 27 '20 17:01 ajcvickers

@SteffenMangold This looks like a bug. We may have enough here to reproduce, but it would be good to get a small, runnable project or complete code listing so we don't end up coming back for more information if it turns out we can't reproduce what you are seeing.

ajcvickers avatar Jan 31 '20 20:01 ajcvickers

@SteffenMangold Do you use dotnet or PMC to apply the migration?

Also, does it happen if you use context.Database.EnsureCreated()? (Assuming that you used HasData)

AndriySvyryd avatar Jul 23 '20 00:07 AndriySvyryd

The problem is that Migrator.GenerateUpSql, Migrator.GenerateDownSql and IMigrationsSqlGenerator.Generate return IReadOnlyList<MigrationCommand> instead of IEnumerable<MigrationCommand> that can be streamed lazily

AndriySvyryd avatar Aug 27 '20 20:08 AndriySvyryd