I created a small project in winform to display data grid view from mhtml file.
everything works fine. i wanted to add something in my project : when i see an empty column i wanted to remove it if there is one.
i almost did it but i have an error.
here is the code :
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
namespace MHtmlTablesHunter
{
public partial class Form1 : Form
{
private readonly string ConvertedFileName = "page.html";
private readonly List<string> ColumnsToSeparate = new List<string>() { "life limit", "interval" }; // Delete these columns
private readonly List<string> ExtraColumnsToAdd = new List<string>() { "Calendar", "Flight Hours", "Landing" }; // Add these columns
public Form1()
{
InitializeComponent();
this.Text = $"MhtmlTablesHunter v{Application.ProductVersion}";
}
//browse for specific file type , in this case its .mhtml
private void btnBrowse_Click(object sender, EventArgs e)
{
using (OpenFileDialog openFileDialog = new OpenFileDialog())
{
openFileDialog.Title = "Please choose the MHTML file";
openFileDialog.Filter = "MHTML files (*.mhtml)|*.mhtml;"; //the file type specified here
openFileDialog.RestoreDirectory = false;
if (openFileDialog.ShowDialog() == DialogResult.OK)
{
textBoxSourceFile.Text = openFileDialog.FileName;
checkAndExtractTable();
}
}
}
private void checkAndExtractTable()
{
string sourcePath = textBoxSourceFile.Text;
if (!string.IsNullOrEmpty(sourcePath)) //check if the input file path is not empty
{
if (File.Exists(sourcePath)) //check if the input file path is exists
{
Task.Run(async () => await ExtractTable(sourcePath)); //run the extraction process in a thread for the UI to be more responsive
}
else
{
MessageBox.Show("Source file doesn't exist.");
}
}
else
{
MessageBox.Show("Please select the source file.");
}
}
//This part concern the headers, rows and columns
public async Task<string> ExtractTable(string sourcePath)
{
try
{
var doc = new HtmlAgilityPack.HtmlDocument();
var converter = new MimeConverter(); //converter used to convert mhtml file to html
if (File.Exists(ConvertedFileName)) //check if previously converted file is exist
{
File.Delete(ConvertedFileName); //delete the file
}
using (FileStream sourceStream = File.OpenRead(sourcePath))
{
using (FileStream destinationStream = File.Open("page.html", FileMode.Create))
{
await converter.Convert(sourceStream, destinationStream); //convert the file to html, it will be stored in the application folder
}
}
doc.Load(ConvertedFileName); //load the html
//var tables = doc.GetElementbyId("d19924139e528_grid"); //get the 7th table by its id, we can get the table id by inspecting element in the internet explorer
var tables = doc.DocumentNode.SelectNodes("//table"); //get all the tables
HtmlAgilityPack.HtmlNode table = null;
if (tables.Count > 0)
{
table = tables[tables.Count - 1]; //take the last table
}
if (table != null) //if the table exists
{
dataGridView1.Invoke((Action)delegate //we use delegate because we accessing the datagridview from a different thread
{
this.dataGridView1.Rows.Clear();
this.dataGridView1.Columns.Clear();
});
var rows = table.SelectNodes(".//tr"); //get all the rows
var nodes = rows[0].SelectNodes("th|td"); //get the header row values, first item will be the header row always
string LifeLimitColumnName = ColumnsToSeparate.Where(c => nodes.Any(n => n.InnerText.ToLower().Contains(c))).FirstOrDefault();
if (string.IsNullOrWhiteSpace(LifeLimitColumnName))
{
LifeLimitColumnName = "Someunknowncolumn";
}
List<string> headers = new List<string>();
for (int i = 0; i < nodes.Count; i++) // LOOP
{
headers.Add(nodes[i].InnerText);
if (!nodes[i].InnerText.ToLower().Contains(LifeLimitColumnName))
{
dataGridView1.Invoke((Action)delegate
{
dataGridView1.Columns.Add("", nodes[i].InnerText);
});
}
}
int indexOfLifeLimitColumn = headers.FindIndex(h => h.ToLower().Contains(LifeLimitColumnName));
if (indexOfLifeLimitColumn > -1)
{
foreach (var eh in ExtraColumnsToAdd)
{
dataGridView1.Invoke((Action)delegate
{
dataGridView1.Columns.Add("", eh); //add extra header to the datagridview
});
}
}
int[] rowDataCount = new int[dataGridView1.Columns.Count];
Array.Clear(rowDataCount, 0, rowDataCount.Length);
for (int i = 1; i < rows.Count; i++) ///loop through rest of the rows
{
var row = rows[i];
var nodes2 = row.SelectNodes("th|td"); //get all columns in the current row
List<string> values = new List<string>(); //list to store row values
for (int x = 0; x < nodes2.Count; x++)
{
//rowes.Cells[x].Value = nodes2[x].InnerText;
string cellText = nodes2[x].InnerText.Replace(" ", " ");
if (!String.IsNullOrWhiteSpace(cellText)) {
rowDataCount[x]++;
}
values.Add(cellText); //add the cell value in the list
}
// Factory for -> Calendar, Flight Hours, Landings
if (indexOfLifeLimitColumn > -1)
{
values.RemoveAt(indexOfLifeLimitColumn);
string lifeLimitValue = nodes2[indexOfLifeLimitColumn].InnerText.Replace(" ", " ");
string[] splittedValues = lifeLimitValue.Split(' ');
for (int y = 0; y < ExtraColumnsToAdd.Count; y++)
{
if (ExtraColumnsToAdd[y] == "Calendar")
{
string valueToAdd = string.Empty;
string[] times = new string[] { "Years", "Year", "Months", "Month", "Day", "Days" };
if (splittedValues.Any(s => times.Any(t => t == s)))
{
var timeFound = times.Where(t => splittedValues.Any(s => s == t)).FirstOrDefault();
int index = splittedValues.ToList().FindIndex(s => s.Equals(timeFound));
valueToAdd = $"{splittedValues[index - 1]} {timeFound}";
}
values.Add(valueToAdd);
}
else if (ExtraColumnsToAdd[y] == "Flight Hours")
{
string valueToAdd = string.Empty;
if (splittedValues.Any(s => s == "FH"))
{
int index = splittedValues.ToList().FindIndex(s => s.Equals("FH"));
valueToAdd = $"{splittedValues[index - 1]} FH";
}
values.Add(valueToAdd);
}
else
{
string valueToAdd = string.Empty;
if (splittedValues.Any(s => s == "LDG"))
{
int index = splittedValues.ToList().FindIndex(s => s.Equals("LDG"));
valueToAdd = $"{splittedValues[index - 1]} LDG";
}
values.Add(valueToAdd);
}
}
}
dataGridView1.Invoke((Action)delegate
{
this.dataGridView1.Rows.Add(values.ToArray()); //add the list as a row
});
int removedCount = 0;
for (int index = 0; index < rowDataCount.Length; index++) {
if (rowDataCount[index] == 0) {
dataGridView1.Invoke((Action)delegate
{
this.dataGridView1.Columns.RemoveAt(index - removedCount);
removedCount++;
});
}
}
}
}
}
catch (Exception ex)
{
MessageBox.Show(ex.Message);
}
return string.Empty;
}
private void textBoxSourceFile_TextChanged(object sender, EventArgs e)
{
}
}
}
here is the design code of the winform project
{
partial class Form1
{
/// <summary>
/// Required designer variable.
/// </summary>
private System.ComponentModel.IContainer components = null;
/// <summary>
/// Clean up any resources being used.
/// </summary>
/// <param name="disposing">true if managed resources should be disposed; otherwise, false.</param>
protected override void Dispose(bool disposing)
{
if (disposing && (components != null))
{
components.Dispose();
}
base.Dispose(disposing);
}
#region Windows Form Designer generated code
/// <summary>
/// Required method for Designer support - do not modify
/// the contents of this method with the code editor.
/// </summary>
private void InitializeComponent()
{
System.ComponentModel.ComponentResourceManager resources = new System.ComponentModel.ComponentResourceManager(typeof(Form1));
this.textBoxSourceFile = new System.Windows.Forms.TextBox();
this.btnBrowse = new System.Windows.Forms.Button();
this.dataGridView1 = new System.Windows.Forms.DataGridView();
((System.ComponentModel.ISupportInitialize)(this.dataGridView1)).BeginInit();
this.SuspendLayout();
//
// textBoxSourceFile
//
this.textBoxSourceFile.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.textBoxSourceFile.Location = new System.Drawing.Point(51, 38);
this.textBoxSourceFile.Margin = new System.Windows.Forms.Padding(4);
this.textBoxSourceFile.Name = "textBoxSourceFile";
this.textBoxSourceFile.Size = new System.Drawing.Size(903, 30);
this.textBoxSourceFile.TabIndex = 0;
//
// btnBrowse
//
this.btnBrowse.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Right)));
this.btnBrowse.Location = new System.Drawing.Point(999, 38);
this.btnBrowse.Margin = new System.Windows.Forms.Padding(4);
this.btnBrowse.Name = "btnBrowse";
this.btnBrowse.Size = new System.Drawing.Size(93, 28);
this.btnBrowse.TabIndex = 1;
this.btnBrowse.Text = "Browse";
this.btnBrowse.UseVisualStyleBackColor = true;
this.btnBrowse.Click += new System.EventHandler(this.btnBrowse_Click);
//
// dataGridView1
//
this.dataGridView1.AllowUserToAddRows = false;
this.dataGridView1.AllowUserToDeleteRows = false;
this.dataGridView1.AllowUserToResizeRows = false;
this.dataGridView1.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
| System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.dataGridView1.AutoSizeColumnsMode = System.Windows.Forms.DataGridViewAutoSizeColumnsMode.Fill;
this.dataGridView1.BackgroundColor = System.Drawing.SystemColors.ControlLight;
this.dataGridView1.ColumnHeadersHeightSizeMode = System.Windows.Forms.DataGridViewColumnHeadersHeightSizeMode.AutoSize;
this.dataGridView1.Location = new System.Drawing.Point(51, 118);
this.dataGridView1.Margin = new System.Windows.Forms.Padding(4);
this.dataGridView1.Name = "dataGridView1";
this.dataGridView1.RowHeadersVisible = false;
this.dataGridView1.RowHeadersWidth = 62;
this.dataGridView1.Size = new System.Drawing.Size(1042, 467);
this.dataGridView1.TabIndex = 2;
//this.dataGridView1.CellContentClick += new System.Windows.Forms.DataGridViewCellEventHandler(this.dataGridView1_CellContentClick);
//
// Form1
//
this.AutoScaleDimensions = new System.Drawing.SizeF(12F, 25F);
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
this.ClientSize = new System.Drawing.Size(1136, 636);
this.Controls.Add(this.dataGridView1);
this.Controls.Add(this.btnBrowse);
this.Controls.Add(this.textBoxSourceFile);
this.Font = new System.Drawing.Font("Microsoft Sans Serif", 9.75F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
this.Icon = ((System.Drawing.Icon)(resources.GetObject("$this.Icon")));
this.Margin = new System.Windows.Forms.Padding(4);
this.MinimumSize = new System.Drawing.Size(1086, 557);
this.Name = "Form1";
this.StartPosition = System.Windows.Forms.FormStartPosition.CenterScreen;
this.Text = "Form1";
((System.ComponentModel.ISupportInitialize)(this.dataGridView1)).EndInit();
this.ResumeLayout(false);
this.PerformLayout();
}
#endregion
private System.Windows.Forms.TextBox textBoxSourceFile;
private System.Windows.Forms.Button btnBrowse;
private System.Windows.Forms.DataGridView dataGridView1;
}
}
When i run it, i have this error
2
Answers
Finally i did it thanks a lot for everyone. Caius jayd you're right. I follow your intruction. it's working now. Here is the code :
Possibly you have more columns in your array than your grid – I can’t see anything that removes a column after you declare the array, but perhaps it would be better to key the removing of a column off of whether the actual grid column has data rather than a memory of whether you put any data in it. Let’s ask the grid "for every column, do all rows for that column have no value? remove the column":
Side note, my column removing loop runs in reverse instead, because it means that removing column index
i
only affects columns> i
, but we’ve already processed them. It doesn’t affect indexing of column indexes we have yet to process that are< i
so we dont need to mess around with indexes.TLDR: If you’re removing things from a collection you’re looping over, looping in reverse can make things simpler