loading...

【PostgreSQL】【EntityFramework Core】Debugging SQL 1

masanori_msl profile image Masui Masanori Updated on ・5 min read

Intro

When my application causes performance problems, I should measure to find the bottle neck.
In this time, I try to measure codes what use EntityFramework Core.

Environments

  • .NET ver.5.0.100-rc.1.20452.10
  • Microsoft.EntityFrameworkCore ver.5.0.0-rc.1.20451.13
  • Npgsql.EntityFrameworkCore.PostgreSQL ver.5.0.0-rc1
  • NLog.Web.AspNetCore ver.4.9.3
  • Microsoft.AspNetCore.Mvc.NewtonsoftJson ver.5.0.0-rc.1.20451.17

Base Project

Company.cs

using System.ComponentModel.DataAnnotations;
using System.ComponentModel.DataAnnotations.Schema;

namespace BookStoreSample.Models
{
    public class Company
    {
        [Key]
        [DatabaseGenerated(DatabaseGeneratedOption.Identity)]
        public int Id { get; set; }
        [Required]
        public string Name { get; set; }

        public List<Book> Books { get; set; } = new List<Book>();
    }
}

Genre.cs

using System.ComponentModel.DataAnnotations;
using System.ComponentModel.DataAnnotations.Schema;

namespace BookStoreSample.Models
{
    public class Genre
    {
        [Key]
        [DatabaseGenerated(DatabaseGeneratedOption.Identity)]
        public int Id { get; set; }
        [Required]
        public string Name { get; set; }
    }
}

Book.cs

using System;
using System.ComponentModel.DataAnnotations;
using System.ComponentModel.DataAnnotations.Schema;

namespace BookStoreSample.Models
{
    public class Book
    {
        [Key]
        [DatabaseGenerated(DatabaseGeneratedOption.Identity)]
        public int Id { get; set; }
        [Required]
        public string Name { get; set; }
        [Column(TypeName = "timestamp with time zone")]
        public DateTime? PublishDate { get; set; }
        [ForeignKey(nameof(Company))]
        public int CompanyId { get; set; }
        [ForeignKey(nameof(Genre))]
        public int GenreId { get; set; }        
        public Company Company { get; set; }
        public Genre Genre { get; set; }
    }
}

BookStoreContext.cs

using Microsoft.EntityFrameworkCore;

namespace BookStoreSample.Models
{
    public class BookStoreContext: DbContext
    {
        public BookStoreContext(DbContextOptions<BookStoreContext> options)
            : base(options)
        {
        }
        public DbSet<Company> Companies => Set<Company>();
        public DbSet<Genre> Genres => Set<Genre>();
        public DbSet<Book> Books => Set<Book>();
    }
}

Generate sample data

To measure SQL performance, I generate sample data.

ISampleCreator.cs

using System.Threading.Tasks;

namespace BookStoreSample.Samples
{
    public interface ISampleCreator
    {
        Task CreateAsync();
    }
}

SampleCreator.cs

using System;
using System.Threading.Tasks;
using BookStoreSample.Models;

namespace BookStoreSample.Samples
{
    public class SampleCreator: ISampleCreator
    {
        private readonly BookStoreContext _context;
        public SampleCreator(BookStoreContext context)
        {
            _context = context;
        }
        public async Task CreateAsync()
        {
            using(var transaction = _context.Database.BeginTransaction())
            {
                try
                {
                    for(var i = 0; i < 1000; i++)
                    {
                        _context.Companies.Add(new Company
                        {
                            Name = $"Company: {i}",
                        });
                    }
                    for(var i = 0; i < 1000; i++)
                    {
                        _context.Genres.Add(new Genre
                        {
                            Name = $"Genre: {i}",
                        });
                    }
                    await _context.SaveChangesAsync();
                    var random = new Random();
                    for(var i = 0; i < 1000000; i++)
                    {
                        _context.Books.Add(new Book
                        {
                            Name = $"Book: {i}",
                            PublishDate = DateTime.Now,
                            CompanyId = random.Next(999) + 1,
                            GenreId = random.Next(999) + 1,
                            Price = 600,
                        });
                    }
                    await _context.SaveChangesAsync();
                    transaction.Commit();
                }
                catch(Exception ex)
                {
                    transaction.Rollback();
                    throw ex;
                }
            }
        }
    }
}

HomeController.cs

using System.Threading.Tasks;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.Logging;
using BookStoreSample.Samples;

namespace BookStoreSample.Controllers
{
    public class HomeController: Controller
    {
        private readonly ILogger<HomeController> _logger;
        private readonly ISampleCreator _sample;
        public HomeController(ILogger<HomeController> logger,
            ISampleCreator sample)
        {
            _logger = logger;
            _sample = sample;
        }
        [Route("Sample")]
        public async Task CreateSamples()
        {
            await _sample.CreateAsync();
        }
    }

}

Output generated SQL

EntityFramework Core generates SQL from my C# codes.
To measure SQL, I want to get the generated SQL.

I can output them by "EnableSensitiveDataLogging".

Startup.cs

using Microsoft.AspNetCore.Builder;
using Microsoft.AspNetCore.Hosting;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Newtonsoft.Json;
...

namespace BookStoreSample
{
    public class Startup
    {
        private readonly IConfiguration configuration;
        public Startup(IConfiguration config)
        {
            configuration = config;
        }
        public void ConfigureServices(IServiceCollection services)
        {
            services.AddControllers()
                .AddNewtonsoftJson(options =>
                    options.SerializerSettings.ReferenceLoopHandling = ReferenceLoopHandling.Ignore);
            services.AddDbContext<BookStoreContext>(options =>
            {
                options.EnableSensitiveDataLogging();
                options.UseNpgsql(configuration["ConnectionStrings"]);
            });
...
        }
...

Because log level of outputting SQL is Information, so I must set Microsoft log level down.

appsettings.Development.json

{
  "Logging": {
    "LogLevel": {
      "Default": "Debug",
      "Microsoft": "Information",
      "Microsoft.Hosting.Lifetime": "Information"
    }
  }
}

For example, when I execute this code,

public async Task<List<SearchedCompany>> SearchCompaniesAsync()
{
    return await _context.Companies
        .ToListAsync();
}

I can get log like below.

...
2020-10-06 18:20:17.1528|20101|INFO|Microsoft.EntityFrameworkCore.Database.Command|Executed DbCommand (9ms) [Parameters=[], CommandType='Text', CommandTimeout='30']
SELECT c."Id", c."Name"
FROM "Companies" AS c |url: http://localhost/Company/Search|action: SearchCompany
...

EXPLAIN, ANALYZE

To measure the SQL performance, I can use EXPLAIN and ANALYZE.

EXPLAIN ANALYZE SELECT c."Id", c."Name" FROM "Companies" AS c

I add them before SQL queries and execute(For example on PgAdmin4), and I can get analyzed results.
The results includes execution time, what key is used for searching, etc.

This is the result on PgAdmin4.
Alt Text

Example

I try two examples and measure execution times.

Sample 1

SearchedCompany.cs

using BookStoreSample.Models;

namespace BookStoreSample.Books
{
    public class SearchedCompany
    {
        public Company? Company { get; set; }
        public Book? Book { get; set; }
    }
}

BookSearchSample.cs

using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using BookStoreSample.Models;
using Microsoft.EntityFrameworkCore;

namespace BookStoreSample.Books
{
    public class BookSearchSample: IBookSearchSample
    {
        private readonly BookStoreContext _context;
        public BookSearchSample(BookStoreContext context)
        {
            _context = context;
        }
        public async Task<List<SearchedCompany>> SearchCompaniesAsync()
        {
            return await _context.Companies
                .Include(c => c.Books)
                .Select(c => new SearchedCompany
                {
                    Company = c,
                    Book = c.Books
                        .OrderByDescending(b => b.Id).First(),
                })
                .ToListAsync();
        }
    }
}

Generated SQL

SELECT c."Id", c."Name", t0."Id", t0."CompanyId", t0."GenreId", t0."Name", t0."Price", t0."PublishDate", b0."Id", b0."CompanyId", b0."GenreId", b0."Name", b0."Price", b0."PublishDate"
FROM "Companies" AS c
LEFT JOIN (
    SELECT t."Id", t."CompanyId", t."GenreId", t."Name", t."Price", t."PublishDate"
    FROM (
        SELECT b."Id", b."CompanyId", b."GenreId", b."Name", b."Price", b."PublishDate", ROW_NUMBER() OVER(PARTITION BY b."CompanyId" ORDER BY b."Id" DESC) AS row
        FROM "Books" AS b
    ) AS t
    WHERE t.row <= 1
) AS t0 ON c."Id" = t0."CompanyId"
LEFT JOIN "Books" AS b0 ON c."Id" = b0."CompanyId"
ORDER BY c."Id", t0."Id", b0."Id"

Planning Time

  • 0.942 ms

Execution Time

  • 4941.233 ms

Sample 2

...
    public class SearchedCompany
    {
        public int CompanyId { get; set; }
        public string CompanyName { get; set; } = "";
        public Book? Book { get; set; }
    }
...

BookSearchSample.cs

...
        public async Task<List<SearchedCompany>> SearchCompaniesAsync()
        {
            return await _context.Companies
                .Include(c => c.Books)
                .Select(c => new SearchedCompany
                {
                    CompanyId = c.Id,
                    CompanyName = c.Name,
                    Book = c.Books
                        .OrderByDescending(b => b.Id).First(),
                })
                .ToListAsync();
        }
...

Generated SQL

SELECT c."Id", c."Name", t0."Id", t0."CompanyId", t0."GenreId", t0."Name", t0."Price", t0."PublishDate"
FROM "Companies" AS c
LEFT JOIN (
    SELECT t."Id", t."CompanyId", t."GenreId", t."Name", t."Price", t."PublishDate"
    FROM (
        SELECT b."Id", b."CompanyId", b."GenreId", b."Name", b."Price", b."PublishDate", ROW_NUMBER() OVER(PARTITION BY b."CompanyId" ORDER BY b."Id" DESC) AS row
        FROM "Books" AS b
    ) AS t
    WHERE t.row <= 1
) AS t0 ON c."Id" = t0."CompanyId"

Planning Time

  • 0.341 ms

Execution Time

  • 2209.166 ms

In this case, I should choose Sample 2.
These differences are small. But the Sample 1's execution time is slower two times than Sample 2's.

Discussion

pic
Editor guide
Collapse
huseyinsimsek profile image
Huseyin Simsek

Thanks for post. I have a question. Is log output and explain, analyze query output same mean?
I understood to "explain, analyze query". Although, I didn't understand to clearly why you used log output.

Collapse
masanori_msl profile image
Masui Masanori Author

Thank you for reading my post.

Is log output and explain, analyze query output same mean?
I meant "log output" was outputting Entity Framework Core generated SQL as log, and "explain, analyze query output" was result of executing "EXPLAIN ANALYZE SELECT ~".

I want to analyze SQL and avoid writing slow queries. But I haven't understand "EXPLAIN" and "ANALYZE" result yet. So I just wrote execution times.