skip to Main Content

I want to upload 1 million data records to a MySQL database using C# while minimizing the number of database calls to improve performance and efficiency. I have a list of UpdateStatusEntity objects containing the data to be inserted and updated.

Here’s my current implementation:

public BulkUploadValidationHelperEntity UploadBulkData(List<UpdateStatusEntity> data)
        {
            BulkUploadValidationHelperEntity validationEntity = new();
            try
            {
                string dataSource = _connectionStrings.Value.DataSource;
                string database = _connectionStrings.Value.Database;
                string userName = _connectionStrings.Value.UserName;
                string password = _connectionStrings.Value.Password;

                DataTable dataTable = new DataTable();
                DataTable dataTable1 = new DataTable();

               

                string connectionString = $"server={dataSource};database={database};user={userName};password={password};CheckParameters=false;";
                using (var connection = new MySqlConnection(connectionString))
                {
                    connection.Open();
                    using (var transaction = connection.BeginTransaction())
                    {
                        try
                        {

                            // Update table1
                            string updateQuery = "UPDATE tblatr set " +
                                                                    "ManagementResponse = @ManagementResponse," +
                                                                    "TargetDate = @TargetDate," +
                                                                    "ManagementAction = @ManagementAction," +
                                                                    "BranchStatus = @BranchStatus," +

                                                                    "AudityUserIdTargetDate = @AudityUserIdTargetDate," +
                                                                    "AudityCreatedDateTargetDate = @AudityCreatedDateTargetDate," +
                                                                    "AudityModifiedUserIdTargetDate = @AudityModifiedUserIdTargetDate," +
                                                                    "AudityModifiedDateTargetDate = @AudityModifiedDateTargetDate," +

                                                                    "AudityUserIdStatus = @AudityUserIdStatus," +
                                                                    "AudityCreatedDateStatus = @AudityCreatedDateStatus," +
                                                                    "AudityModifiedUserIdStatus = @AudityModifiedUserIdStatus," +
                                                                    "AudityModifiedDateStatus = @AudityModifiedDateStatus," +

                                                                    "ModifiedBy = @ModifiedBy," +
                                                                    "ModifiedDate = @ModifiedDate" +
                                                                    "where TblAtrId = @TblAtrId";


                            using (var command = new MySqlCommand(updateQuery, connection, transaction))
                            {
                               

                                foreach (var item in data)
                                {
                                    command.Parameters["@ManagementResponse"].Value = item.ManagementResponse;
                                    command.Parameters["@TargetDate"].Value = item.TargetDate;
                                    command.Parameters["@ManagementAction"].Value = item.ManagementAction;
                                    command.Parameters["@BranchStatus"].Value = item.BranchStatus;

                                    command.Parameters["@AudityUserIdTargetDate"].Value = item.AudityUserIdTargetDate;
                                    command.Parameters["@AudityCreatedDateTargetDate"].Value = item.AudityCreatedDateTargetDate;
                                    command.Parameters["@AudityModifiedUserIdTargetDate"].Value = item.AudityModifiedUserIdTargetDate;
                                    command.Parameters["@AudityModifiedDateTargetDate"].Value = item.AudityModifiedDateTargetDate;
                                    
                                    command.Parameters["@AudityUserIdStatus"].Value = item.AudityUserIdStatus;
                                    command.Parameters["@AudityCreatedDateStatus"].Value = item.AudityCreatedDateStatus;
                                    command.Parameters["@AudityModifiedUserIdStatus"].Value = item.AudityModifiedUserIdStatus;
                                    command.Parameters["@AudityModifiedDateStatus"].Value = item.AudityModifiedDateStatus;

                                    command.Parameters["@ModifiedBy"].Value = item.ModifiedBy;
                                    command.Parameters["@ModifiedDate"].Value = item.ModifiedDate;
                                    command.Parameters["@TblAtrId"].Value = item.TblAtrId;

                                    command.ExecuteNonQuery();
                                }
                            }


                            // Insert into table2 using batch processing
                            string InsertQuery = "INSERT INTO tblatrhistory " +
                                                             "(`tblAtrId`,`AuditSetId`,`ContractNo`,`TransactionControlSectorId`," +
                                                             "`Conformity`,`Sample`,`AuditorQuery`,`ManagementResponse`,`TargetDate`,`ManagementAction`," +
                                                             "`BranchStatus`,`AuditorUserId`,`AuditorCreatedDate`,`AuditorModifiedUserId`,`AuditorModifiedDate`," +
                                                             "`AudityUserIdTargetDate`,`AudityCreatedDateTargetDate`,`AudityModifiedUserIdTargetDate`," +
                                                             "`AudityModifiedDateTargetDate`,`AudityUserIdStatus`,`AudityCreatedDateStatus`," +
                                                             "`AudityModifiedUserIdStatus`,`AudityModifiedDateStatus`,`Action`,`ActionUserId`," +
                                                             "`ActionUserIdDate`,`IsActive`,`Confirmity`,`ChangedData`,`ActionRoleId`)" +
                                                             "VALUES" +
                                                             "(@TblATrId,@AuditSetId,@ContractNo,@TransactionControlSectorId," +
                                                             "@Conformity, @Sample, @AuditorQuery, @ManagementResponse,@TargetDate," +
                                                             "@ManagementAction, @BranchStatus, @AuditorUserId, @AuditorCreatedDate,@AuditorModifiedUserId," +
                                                             "@AuditorModifiedDate ,@AudityUserIdTargetDate ,@AudityCreatedDateTargetDate," +
                                                             "@AudityModifiedUserIdTargetDate ,@AudityModifiedDateTargetDate ,@AudityUserIdStatus," +
                                                             "@AudityCreatedDateStatus ,@AudityModifiedUserIdStatus ,@AudityModifiedDateStatus," +
                                                             "@Action, @ActionUserId, @ActionUserIdDate ,@IsActive ,@Confirmity," +
                                                             "@ChangedData,@ActionRoleId);";

                            using (var command = new MySqlCommand(InsertQuery, connection, transaction))
                            {


                                foreach (var item in data)
                                {
                                    command.Parameters["@TblATrId"].Value = item.TblAtrId;
                                    command.Parameters["@AuditSetId"].Value = item.AuditSetId;
                                    command.Parameters["@ContractNo"].Value = item.ContractNo;
                                    command.Parameters["@TransactionControlSectorId"].Value = item.TransactionControlSectorId;

                                    command.Parameters["@Conformity"].Value = item.Conformity;
                                    command.Parameters["@Sample"].Value = item.Sample;
                                    command.Parameters["@AuditorQuery"].Value = item.AuditorQuery;
                                    command.Parameters["@ManagementResponse"].Value = item.ManagementResponse;
                                    command.Parameters["@TargetDate"].Value = item.TargetDate;
                                    command.Parameters["@ManagementAction"].Value = item.ManagementAction;

                                    command.Parameters["@BranchStatus"].Value = item.BranchStatus;
                                    command.Parameters["@AuditorUserId"].Value = item.AuditorUserId;
                                    command.Parameters["@AuditorCreatedDate"].Value = item.AuditorCreatedDate;
                                    command.Parameters["@AuditorModifiedUserId"].Value = item.AuditorModifiedUserId;

                                    command.Parameters["@AudityModifiedUserIdTargetDate"].Value = item.AuditorModifiedUserId;
                                    command.Parameters["@AudityModifiedDateTargetDate"].Value = item.AudityModifiedDateTargetDate;
                                    command.Parameters["@AudityUserIdStatus"].Value = item.AudityModifiedDateTargetDate;

                                    command.Parameters["@AudityCreatedDateStatus"].Value = item.AudityCreatedDateStatus;
                                    command.Parameters["@AudityModifiedUserIdStatus"].Value = item.AudityModifiedUserIdStatus;
                                    command.Parameters["@AudityModifiedDateStatus"].Value = item.AudityModifiedDateStatus;

                                    command.Parameters["@Action"].Value = item.Action;
                                    command.Parameters["@ActionUserId"].Value = item.UserId;
                                    command.Parameters["@ActionUserIdDate"].Value = DateTime.Now;
                                    command.Parameters["@IsActive"].Value = true;
                                    command.Parameters["@Confirmity"].Value = item.Confirmity;

                                    command.Parameters["@ChangedData"].Value = item.ChangedProperties;
                                    command.Parameters["@ActionRoleId"].Value = item.RoleId;

                                    command.ExecuteNonQuery();
                                }
                            }
                            // Commit the transaction
                            transaction.Commit();
                        }
                        catch (Exception ex)
                        {
                            // Handle exceptions and roll back the transaction if necessary
                            transaction.Rollback();
                            ErrorLogEntity error = new ErrorLogEntity();
                            error.ControllerName = "DashBoardService";
                            error.ActionName = "UploadBulkData";
                            error.Exception = ex.Message;
                            error.StackTrace = ex.StackTrace;
                            LogHelper.LogError(error);
                            validationEntity.IsError = true;
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                ErrorLogEntity error = new ErrorLogEntity();
                error.ControllerName = "DashBoardService";
                error.ActionName = "UploadBulkData";
                error.Exception = ex.Message;
                error.StackTrace = ex.StackTrace;
                LogHelper.LogError(error);
                validationEntity.IsError = true;
            }
            
            return validationEntity;
            
        }

However, this implementation results in too many database calls, causing performance issues when dealing with a large number of records. I am looking for a more efficient way to upload this data, possibly using bulk operations to reduce the number of database calls and improve performance.

What is the best approach to efficiently upload 1 million data records to a MySQL database using C# while avoiding too many database calls? Are there any libraries or techniques that allow me to perform bulk updates and inserts with a single database call?

I would appreciate any insights or code examples demonstrating how to optimize the data upload process to achieve better performance.

Thank you!

3

Answers


  1. Running 1M+ updates over the wire is not recommended, since:

    • is slow (data transfers),
    • blocks replication, increasing replication lag on read-only replicas,
    • saturates network bandwidth to your DB server,
    • is error-prone, since network disconnect will break the whole process.

    All of these topics have nothing to do with underlying programming language.

    To update big volumes of data on busy systems you can follow this approach:

    • produce an .sql file with UPDATE and INSERT commands (instead of running commands, just output them and redirect output to a local file on client),
    • for busy systems add SELECT SLEEP(5); sleep interval after (kind of) 5000 UPDATE statements (less important for INSERTS),
    • commit frequently, don’t push millions of rows into one giant transaction; commit 5000-UPDATE batches (even this is way too many), otherwise on a busy server you’ll cross a deadlock and transaction will rollback,
    • compress the file and move it to the primary (read-write) DB server,
    • execute the file from command line on primary DB server in the background.

    Your .sql file should look like this:

    -- load-tblatr-tblatrhistory.sql
    
    -- UPDATE block of 5000
    BEGIN;
    UPDATE tblatr SET ... WHERE ...;
    UPDATE tblatr SET ... WHERE ...;
    ...
    UPDATE tblatr SET ... WHERE ...;
    COMMIT;
    SELECT SLEEP(5) AS upd_5000;
    
    -- UPDATE block of 5000
    BEGIN;
    UPDATE tblatr SET ... WHERE ...;
    ...
    UPDATE tblatr SET ... WHERE ...;
    COMMIT;
    SELECT SLEEP(5) AS upd_10000; -- increment column name with every block
    
    -- INSERT block of 5000
    INSERT INTO tblatrhistory (...)
    VALUES (...),
    (...),
    ...
    (...);
    SELECT SLEEP(1) AS ins_5000;
    
    -- INSERT block of 5000
    INSERT INTO tblatrhistory (...)
    VALUES (...),
    (...),
    ...
    (...);
    SELECT SLEEP(1) AS ins_10000; -- increment column name with every block
    

    Please pay attention to SELECT statements, their column names should reflect progress of execution. You’ll be observing the progress in the output. Also, if/when at one point script breaks, you’ll know where to restart.

    Command line to executing the file should look like this:

    mysql --unbuffered 
      -h localhost 
      -u USER 
      -p 
      DB_NAME 
      <load-tblatr-tblatrhistory.sql 
      >load-tblatr-tblatrhistory.log 2>&1 &
    

    --unbuffered is key to observe your script execution at real time. Same for 2>&1 and & operators.

    This will start with god’s help your load job. In the background. Outputting to load-tblatr-tblatrhistory.log both SELECT results and errors if they happen.

    Column names in SELECT results will indicate load progress.

    Watch script progress with

    tail -f load-tblatr-tblatrhistory.log`
    

    This will print something like:

    upd_5000
    0
    upd_10000
    0
    upd_15000
    0
    ...
    

    1M updates with 5 sec delay after each 5000 rows will last around 20 min. 5s-pauses will allow other activity continue during script execution, both on primary server and read-only replicas.

    If your session disconnects from the server during script execution (laptop goes to sleep for ex.), no problem, since the execution is happening in the background in detached mode. But I would recommend disconnect manually after starting the script, then reconnect with new session to watch the logs.

    Login or Signup to reply.
  2. If you use MySqlConnector (disclaimer: I’m the lead author), you can use the MySqlBulkCopy class to perform a bulk update with one network call. (Note that you will need to add ;AllowLoadLocalInfile=true to your connection string for this to work.)

    It can’t insert your UpdateStatusEntity objects directly, so you will need to convert them to DataRow objects (or a DataTable) first in order to perform the bulk copy.

    // your objects need to be represented as DataRow types; you need to perform this conversion
    List<DataRow> updateDataRows = ConvertDataToUpdateDataRows(data);
    List<DataRow> auditDataRows = ConvertDataToAuditDataRows(data);
    
    var connectionStringBuilder = new MySqlConnectionStringBuilder
    {
        Server = dataSource,
        Database = database,
        UserID = userName,
        Password = password,
        AllowLoadLocalInfile = true,
    };
    
    // open the connection
    using var connection = new MySqlConnection(connectionStringBuilder.ConnectionString);
    connection.Open();
    
    // bulk copy the data, replacing rows in the first table
    var bulkCopy1 = new MySqlBulkCopy(connection);
    bulkCopy1.DestinationTableName = "tblatr";
    bulkCopy1.ConflictOption = MySqlBulkLoaderConflictOption.Replace;
    var result1 = bulkCopy1.WriteToServer(updateDataRows);
    
    // do a second bulk copy, inserting rows
    var bulkCopy2 = new MySqlBulkCopy(connection);
    bulkCopy2.DestinationTableName = "tblatrhistory";
    var result2 = bulkCopy2.WriteToServer(auditDataRows);
    
    Login or Signup to reply.
  3. You can proceed to "bulk" operations, caching some data (eg. 10-50 records at time) in memory arrays and building the call using the strategy as described in https://www.tutorialspoint.com/how-to-bulk-update-mysql-data-with-a-single-query.

    Your query can be built one time (hardcoded) with minor effort and you must control only the pointer to these arrays in your loop.

    In this way you will certainly speed the job and, at the same time, ensure the commit of these records avoiding problems with (eg.) connection lost.

    It won’t provide a lightning flash operation, but I believe you will reduce, considerably, the spent time.

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search