Search code examples
entity-framework-coreasp.net-core-2.0asp.net-core-webapiasp.net-core-2.1

ASP.NET Core 2.1 Entity Framework Core Related Data Duplication


I'm trying to build an ASP.NET Core API which returns an object containing some of related, joined, data.

But when I call an Article model using Include(), it's mapped automatically by EF Core. Which is intended, but.. unexpected mapping is occurring.

EF Core produce fully mapped object for all objects so that too much data is generated.

For example, User model has Article property and when I call Article model, User property also contains Article object information. As a result, it returns a lot of duplicated data.

At the first time, I think it's coming from

https://learn.microsoft.com/en-us/ef/core/querying/related-data

  1. Eager loading
  2. Explicit loading
  3. Lazy loading

But it wasn't.

I don't know how to configure EF Core not to map all the objects inside..

*Database Schema & Model

enter image description here

public class Topic
{
    [Key]
    [DatabaseGenerated(DatabaseGeneratedOption.Identity)]
    [HiddenInput(DisplayValue = false)]
    public int TopicId { get; set; }

    [Required]
    [StringLength(20, ErrorMessage = "Title cannot be longer than 20 characters.")]
    public string Title { get; set; }

    [StringLength(200, ErrorMessage = "Description cannot be longer than 200 characters.")]
    public string Description { get; set; }

    public byte[] Picture { get; set; }

    public string PictureMimeType { get; set; }

    [DataType(DataType.Date)]
    [DisplayFormat(DataFormatString = "{0:MM-dd-yyyy}", ApplyFormatInEditMode = true)]
    public DateTime PostDate { get; set; }

    [DataType(DataType.Date)]
    [DisplayFormat(DataFormatString = "{0:MM-dd-yyyy}", ApplyFormatInEditMode = true)]
    public DateTime ModifyDate { get; set; }

    [Display(Name = "Show")]
    public bool ShowFlag { get; set; }

    public int UserId { get; set; }

    public virtual User User { get; set; }

    public virtual ICollection<Article> Articles { get; set; }
}

public class Article
{
    [Key]
    [DatabaseGenerated(DatabaseGeneratedOption.Identity)]
    [HiddenInput(DisplayValue = false)]
    public int ArticleId { get; set; }

    [Required]
    [StringLength(100, ErrorMessage = "Title cannot be longer than 100 characters.")]
    public string Title { get; set; }

    public string Content { get; set; }

    [StringLength(10, ErrorMessage = "Category cannot be longer than 10 characters.")]
    public string Category { get; set; } = "Free";

    public byte[] Picture { get; set; }

    public string PictureMimeType { get; set; }

    [DataType(DataType.Date)]
    [DisplayFormat(DataFormatString = "{0:MM-dd-yyyy}", ApplyFormatInEditMode = true)]
    public DateTime PostDate { get; set; }

    [DataType(DataType.Date)]
    [DisplayFormat(DataFormatString = "{0:MM-dd-yyyy}", ApplyFormatInEditMode = true)]
    public DateTime ModifyDate { get; set; }

    public int ReadCount { get; set; }

    [Required]
    [Display(Name = "Show")]
    public bool ShowFlag { get; set; }

    public int TopicId { get; set; }

    public virtual Topic Topic { get; set; }

    public int UserId { get; set; }

    public virtual User User { get; set; }
}

public class User
{
    [Key]
    [DatabaseGenerated(DatabaseGeneratedOption.Identity)]
    [HiddenInput(DisplayValue = false)]
    public int UserId { get; set; }

    [StringLength(20, ErrorMessage = "Name cannot be longer than 20 characters.")]
    public string Name { get; set; }

    [StringLength(255, ErrorMessage = "The {0} must be at least {2} characters long.", MinimumLength = 6)]
    [DataType(DataType.Password)]
    public string Password { get; set; }

    [DataType(DataType.EmailAddress)]
    [EmailAddress]
    public string Email { get; set; }

    [StringLength(20, ErrorMessage = "Title cannot be longer than 20 characters.")]
    public string Title { get; set; }

    [StringLength(20, ErrorMessage = "Phone number cannot be longer than 20 characters.")]
    [DataType(DataType.PhoneNumber)]
    public string Phone { get; set; }

    [StringLength(100, ErrorMessage = "Address cannot be longer than 100 characters.")]
    public string Address { get; set; }

    [StringLength(100, ErrorMessage = "Introduction cannot be longer than 100 characters.")]
    public string Introduction { get; set; }

    [DataType(DataType.Date)]
    public DateTime? Birthdate { get; set; }

    public byte[] Picture { get; set; }

    public string PictureMimeType { get; set; }

    public virtual ICollection<Topic> Topics { get; set; }

    public virtual ICollection<Article> Articles { get; set; }
}

*Controller Code

   var article = await this._context.Articles
     .Include(a => a.User)
     .Include(a => a.Topic)
     .SingleOrDefaultAsync(a => a.ArticleId == id);

*Duplicated Data

{
"articleId": 5,
"title": "Does TaskAll() really wait for all in any case?",
"content": null,
"category": "Threading",
"picture": null,
"pictureMimeType": null,
"postDate": "2018-06-18T07:00:00",
"modifyDate": "2018-06-20T07:47:42.1854485",
"readCount": 101,
"showFlag": true,
"topicId": 1,
"topic": {
    "topicId": 1,
    "title": "C#",
    "description": "About C#",
    "picture": null,
    "pictureMimeType": null,
    "postDate": "2018-05-28T07:00:00",
    "modifyDate": "2018-05-28T23:36:04.701311",
    "showFlag": true,
    "userId": 1,
    "user": {
        "userId": 1,
        "name": "",
        "password": null,
        "email": "",
        "title": "Junior Programmer!!",
        "phone": "",
        "address": "",
        "introduction": "Hey, I'm Jason, most motivated person to be a fullstack programmer! ",
        "birthdate": null,
        "picture": null,
        "pictureMimeType": null,
        "permissionId": 0,
        "permission": null,
        "topics": [],
        "articles": [
            {
                "articleId": 5,
                "title": "Does TaskAll() really wait for all in any case?",
                "content": null,
                "category": "Threading",
                "picture": null,
                "pictureMimeType": null,
                "postDate": "2018-06-18T07:00:00",
                "modifyDate": "2018-06-20T07:47:42.1854485",
                "readCount": 101,
                "showFlag": true,
                "topicId": 1,
                "userId": 1
            }
        ]
    },
    "articles": [
        {
            "articleId": 5,
            "title": "Does TaskAll() really wait for all in any case?",
            "content": null,
            "category": "Threading",
            "picture": null,
            "pictureMimeType": null,
            "postDate": "2018-06-18T07:00:00",
            "modifyDate": "2018-06-20T07:47:42.1854485",
            "readCount": 101,
            "showFlag": true,
            "topicId": 1,
            "userId": 1,
            "user": {
                "userId": 1,
                "name": "",
                "password": null,
                "email": "",
                "title": "Junior Programmer!!",
                "phone": "",
                "address": "",
                "introduction": "Hey, I'm Jason, most motivated person to be a fullstack programmer! ",
                "birthdate": null,
                "picture": null,
                "pictureMimeType": null,
                "topics": [],
                "articles": []
            }
        }
    ]
},
"userId": 1,
"user": {
    "userId": 1,
    "name": "",
    "password": null,
    "email": "",
    "title": "Junior Programmer!!",
    "phone": "",
    "address": "",
    "introduction": "Hey, I'm Jason, most motivated person to be a fullstack programmer! ",
    "birthdate": null,
    "picture": null,
    "pictureMimeType": null,
    "topics": [
        {
            "topicId": 1,
            "title": "C#",
            "description": "About C#",
            "picture": null,
            "pictureMimeType": null,
            "postDate": "2018-05-28T07:00:00",
            "modifyDate": "2018-05-28T23:36:04.701311",
            "showFlag": true,
            "userId": 1,
            "articles": [
                {
                    "articleId": 5,
                    "title": "Does TaskAll() really wait for all in any case?",
                    "content": null,
                    "category": "Threading",
                    "picture": null,
                    "pictureMimeType": null,
                    "postDate": "2018-06-18T07:00:00",
                    "modifyDate": "2018-06-20T07:47:42.1854485",
                    "readCount": 101,
                    "showFlag": true,
                    "topicId": 1,
                    "userId": 1
                }
            ]
        }
    ],
    "articles": [
        {
            "articleId": 5,
            "title": "Does TaskAll() really wait for all in any case?",
            "content": null,
            "category": "Threading",
            "picture": null,
            "pictureMimeType": null,
            "postDate": "2018-06-18T07:00:00",
            "modifyDate": "2018-06-20T07:47:42.1854485",
            "readCount": 101,
            "showFlag": true,
            "topicId": 1,
            "topic": {
                "topicId": 1,
                "title": "C#",
                "description": "About C#",
                "picture": null,
                "pictureMimeType": null,
                "postDate": "2018-05-28T07:00:00",
                "modifyDate": "2018-05-28T23:36:04.701311",
                "showFlag": true,
                "userId": 1,
                "articles": []
            },
            "userId": 1
        }
    ]
}

}


Solution

  • Actually , there's no "duplicated" data .

    When the following query executed :

    var article = await this._context.Articles
         .Include(a => a.User)
         .Include(a => a.Topic)
         .SingleOrDefaultAsync(a => a.ArticleId == id);
    

    the sql behind the scene will be :

    SELECT TOP(1) 
        [a].[ArticleId], [a].[Category], [a].[Content], [a].[ModifyDate], [a].[Picture], [a].[PictureMimeType], [a].[PostDate],[a].[ReadCount], [a].[ShowFlag], [a].[Title], 
        [a].[TopicId], [a].[UserId], 
    
        [a.Topic].[TopicId], [a.Topic].[Description], [a.Topic].[ModifyDate], [a.Topic].[Picture], [a.Topic].[PictureMimeType], [a.Topic].[PostDate], [a.Topic].[ShowFlag], [a.Topic].[Title],[a.Topic].[UserId], 
    
        [a.User].[UserId], [a.User].[Address], [a.User].[Birthdate], [a.User].[Email], [a.User].[Introduction], [a.User].[Name], [a.User].[Password], [a.User].[Phone], [a.User].[Picture], [a.User].[PictureMimeType], [a.User].[Title]
    FROM [Article] AS [a]
    INNER JOIN [Topics] AS [a.Topic] ON [a].[TopicId] = [a.Topic].[TopicId]
    INNER JOIN [Users] AS [a.User] ON [a].[UserId] = [a.User].[UserId]
    WHERE [a].[ArticleId] = @__id_0
    

    The SQL will not load any extra records . Let's say a User has two articles :

    Article1 : { ArticleId=1, UserId=1}
    Article3 : { ArticleId=3, UserId=1}
    

    The query above will load only one article .

    When the records loaded into memory from server , EFCore knows there's a relationship of 1-to-many between User and Articles and just begins to set the user object as the navigation property of article . The same happens between User and Topic Entities. All these are finished in memory .

    As you see , there's no extra records retrieved from database and no duplicated data in memory .

    What makes you confused is the data that serialized to client . However ,it does make sense . As a plain string has no idea of reference / pointer , it's hard to represent the relationship between objects with json .

    By the way , there's no need to decorate you Email property of User with two almost same attributes :

    [DataType(DataType.EmailAddress)]
    [EmailAddress]
    public string Email { get; set; }
    

    It will cause an error of "two same custom attributes" when migrating database.