I want to upload 1 million data records to a MySQL database using C# while minimizing the number of database calls to improve performance and efficiency. I have a list of UpdateStatusEntity
objects containing the data to be inserted and updated.
Here’s my current implementation:
public BulkUploadValidationHelperEntity UploadBulkData(List<UpdateStatusEntity> data)
{
BulkUploadValidationHelperEntity validationEntity = new();
try
{
string dataSource = _connectionStrings.Value.DataSource;
string database = _connectionStrings.Value.Database;
string userName = _connectionStrings.Value.UserName;
string password = _connectionStrings.Value.Password;
DataTable dataTable = new DataTable();
DataTable dataTable1 = new DataTable();
string connectionString = $"server={dataSource};database={database};user={userName};password={password};CheckParameters=false;";
using (var connection = new MySqlConnection(connectionString))
{
connection.Open();
using (var transaction = connection.BeginTransaction())
{
try
{
// Update table1
string updateQuery = "UPDATE tblatr set " +
"ManagementResponse = @ManagementResponse," +
"TargetDate = @TargetDate," +
"ManagementAction = @ManagementAction," +
"BranchStatus = @BranchStatus," +
"AudityUserIdTargetDate = @AudityUserIdTargetDate," +
"AudityCreatedDateTargetDate = @AudityCreatedDateTargetDate," +
"AudityModifiedUserIdTargetDate = @AudityModifiedUserIdTargetDate," +
"AudityModifiedDateTargetDate = @AudityModifiedDateTargetDate," +
"AudityUserIdStatus = @AudityUserIdStatus," +
"AudityCreatedDateStatus = @AudityCreatedDateStatus," +
"AudityModifiedUserIdStatus = @AudityModifiedUserIdStatus," +
"AudityModifiedDateStatus = @AudityModifiedDateStatus," +
"ModifiedBy = @ModifiedBy," +
"ModifiedDate = @ModifiedDate" +
"where TblAtrId = @TblAtrId";
using (var command = new MySqlCommand(updateQuery, connection, transaction))
{
foreach (var item in data)
{
command.Parameters["@ManagementResponse"].Value = item.ManagementResponse;
command.Parameters["@TargetDate"].Value = item.TargetDate;
command.Parameters["@ManagementAction"].Value = item.ManagementAction;
command.Parameters["@BranchStatus"].Value = item.BranchStatus;
command.Parameters["@AudityUserIdTargetDate"].Value = item.AudityUserIdTargetDate;
command.Parameters["@AudityCreatedDateTargetDate"].Value = item.AudityCreatedDateTargetDate;
command.Parameters["@AudityModifiedUserIdTargetDate"].Value = item.AudityModifiedUserIdTargetDate;
command.Parameters["@AudityModifiedDateTargetDate"].Value = item.AudityModifiedDateTargetDate;
command.Parameters["@AudityUserIdStatus"].Value = item.AudityUserIdStatus;
command.Parameters["@AudityCreatedDateStatus"].Value = item.AudityCreatedDateStatus;
command.Parameters["@AudityModifiedUserIdStatus"].Value = item.AudityModifiedUserIdStatus;
command.Parameters["@AudityModifiedDateStatus"].Value = item.AudityModifiedDateStatus;
command.Parameters["@ModifiedBy"].Value = item.ModifiedBy;
command.Parameters["@ModifiedDate"].Value = item.ModifiedDate;
command.Parameters["@TblAtrId"].Value = item.TblAtrId;
command.ExecuteNonQuery();
}
}
// Insert into table2 using batch processing
string InsertQuery = "INSERT INTO tblatrhistory " +
"(`tblAtrId`,`AuditSetId`,`ContractNo`,`TransactionControlSectorId`," +
"`Conformity`,`Sample`,`AuditorQuery`,`ManagementResponse`,`TargetDate`,`ManagementAction`," +
"`BranchStatus`,`AuditorUserId`,`AuditorCreatedDate`,`AuditorModifiedUserId`,`AuditorModifiedDate`," +
"`AudityUserIdTargetDate`,`AudityCreatedDateTargetDate`,`AudityModifiedUserIdTargetDate`," +
"`AudityModifiedDateTargetDate`,`AudityUserIdStatus`,`AudityCreatedDateStatus`," +
"`AudityModifiedUserIdStatus`,`AudityModifiedDateStatus`,`Action`,`ActionUserId`," +
"`ActionUserIdDate`,`IsActive`,`Confirmity`,`ChangedData`,`ActionRoleId`)" +
"VALUES" +
"(@TblATrId,@AuditSetId,@ContractNo,@TransactionControlSectorId," +
"@Conformity, @Sample, @AuditorQuery, @ManagementResponse,@TargetDate," +
"@ManagementAction, @BranchStatus, @AuditorUserId, @AuditorCreatedDate,@AuditorModifiedUserId," +
"@AuditorModifiedDate ,@AudityUserIdTargetDate ,@AudityCreatedDateTargetDate," +
"@AudityModifiedUserIdTargetDate ,@AudityModifiedDateTargetDate ,@AudityUserIdStatus," +
"@AudityCreatedDateStatus ,@AudityModifiedUserIdStatus ,@AudityModifiedDateStatus," +
"@Action, @ActionUserId, @ActionUserIdDate ,@IsActive ,@Confirmity," +
"@ChangedData,@ActionRoleId);";
using (var command = new MySqlCommand(InsertQuery, connection, transaction))
{
foreach (var item in data)
{
command.Parameters["@TblATrId"].Value = item.TblAtrId;
command.Parameters["@AuditSetId"].Value = item.AuditSetId;
command.Parameters["@ContractNo"].Value = item.ContractNo;
command.Parameters["@TransactionControlSectorId"].Value = item.TransactionControlSectorId;
command.Parameters["@Conformity"].Value = item.Conformity;
command.Parameters["@Sample"].Value = item.Sample;
command.Parameters["@AuditorQuery"].Value = item.AuditorQuery;
command.Parameters["@ManagementResponse"].Value = item.ManagementResponse;
command.Parameters["@TargetDate"].Value = item.TargetDate;
command.Parameters["@ManagementAction"].Value = item.ManagementAction;
command.Parameters["@BranchStatus"].Value = item.BranchStatus;
command.Parameters["@AuditorUserId"].Value = item.AuditorUserId;
command.Parameters["@AuditorCreatedDate"].Value = item.AuditorCreatedDate;
command.Parameters["@AuditorModifiedUserId"].Value = item.AuditorModifiedUserId;
command.Parameters["@AudityModifiedUserIdTargetDate"].Value = item.AuditorModifiedUserId;
command.Parameters["@AudityModifiedDateTargetDate"].Value = item.AudityModifiedDateTargetDate;
command.Parameters["@AudityUserIdStatus"].Value = item.AudityModifiedDateTargetDate;
command.Parameters["@AudityCreatedDateStatus"].Value = item.AudityCreatedDateStatus;
command.Parameters["@AudityModifiedUserIdStatus"].Value = item.AudityModifiedUserIdStatus;
command.Parameters["@AudityModifiedDateStatus"].Value = item.AudityModifiedDateStatus;
command.Parameters["@Action"].Value = item.Action;
command.Parameters["@ActionUserId"].Value = item.UserId;
command.Parameters["@ActionUserIdDate"].Value = DateTime.Now;
command.Parameters["@IsActive"].Value = true;
command.Parameters["@Confirmity"].Value = item.Confirmity;
command.Parameters["@ChangedData"].Value = item.ChangedProperties;
command.Parameters["@ActionRoleId"].Value = item.RoleId;
command.ExecuteNonQuery();
}
}
// Commit the transaction
transaction.Commit();
}
catch (Exception ex)
{
// Handle exceptions and roll back the transaction if necessary
transaction.Rollback();
ErrorLogEntity error = new ErrorLogEntity();
error.ControllerName = "DashBoardService";
error.ActionName = "UploadBulkData";
error.Exception = ex.Message;
error.StackTrace = ex.StackTrace;
LogHelper.LogError(error);
validationEntity.IsError = true;
}
}
}
}
catch (Exception ex)
{
ErrorLogEntity error = new ErrorLogEntity();
error.ControllerName = "DashBoardService";
error.ActionName = "UploadBulkData";
error.Exception = ex.Message;
error.StackTrace = ex.StackTrace;
LogHelper.LogError(error);
validationEntity.IsError = true;
}
return validationEntity;
}
However, this implementation results in too many database calls, causing performance issues when dealing with a large number of records. I am looking for a more efficient way to upload this data, possibly using bulk operations to reduce the number of database calls and improve performance.
What is the best approach to efficiently upload 1 million data records to a MySQL database using C# while avoiding too many database calls? Are there any libraries or techniques that allow me to perform bulk updates and inserts with a single database call?
I would appreciate any insights or code examples demonstrating how to optimize the data upload process to achieve better performance.
Thank you!
3
Answers
Running 1M+ updates over the wire is not recommended, since:
All of these topics have nothing to do with underlying programming language.
To update big volumes of data on busy systems you can follow this approach:
SELECT SLEEP(5);
sleep interval after (kind of) 5000 UPDATE statements (less important for INSERTS),Your .sql file should look like this:
Please pay attention to SELECT statements, their column names should reflect progress of execution. You’ll be observing the progress in the output. Also, if/when at one point script breaks, you’ll know where to restart.
Command line to executing the file should look like this:
--unbuffered
is key to observe your script execution at real time. Same for2>&1
and&
operators.This will start with god’s help your load job. In the background. Outputting to
load-tblatr-tblatrhistory.log
both SELECT results and errors if they happen.Column names in SELECT results will indicate load progress.
Watch script progress with
This will print something like:
1M updates with 5 sec delay after each 5000 rows will last around 20 min. 5s-pauses will allow other activity continue during script execution, both on primary server and read-only replicas.
If your session disconnects from the server during script execution (laptop goes to sleep for ex.), no problem, since the execution is happening in the background in detached mode. But I would recommend disconnect manually after starting the script, then reconnect with new session to watch the logs.
If you use MySqlConnector (disclaimer: I’m the lead author), you can use the
MySqlBulkCopy
class to perform a bulk update with one network call. (Note that you will need to add;AllowLoadLocalInfile=true
to your connection string for this to work.)It can’t insert your
UpdateStatusEntity
objects directly, so you will need to convert them toDataRow
objects (or aDataTable
) first in order to perform the bulk copy.You can proceed to "bulk" operations, caching some data (eg. 10-50 records at time) in memory arrays and building the call using the strategy as described in https://www.tutorialspoint.com/how-to-bulk-update-mysql-data-with-a-single-query.
Your query can be built one time (hardcoded) with minor effort and you must control only the pointer to these arrays in your loop.
In this way you will certainly speed the job and, at the same time, ensure the commit of these records avoiding problems with (eg.) connection lost.
It won’t provide a lightning flash operation, but I believe you will reduce, considerably, the spent time.