skip to Main Content

I have to upload a 90 MB CSV file and then analyze it with chart.js as a chart. The CSV file has measured values that are recorded per minute. The 90 MB are then almost a year’s worth of data. I have already set the website response time to a high value. But my code is going down the drain. That’s why I’ve made do and only display a certain number of data values and then click through the diagram at intervals. Even that is still very slow and not nice. For the evaluation, at least a monthly overview would be nicer. But I have no idea what adjustments I can still make. Do you have any ideas?

HTML

<!DOCTYPE html>
<html lang="de">
   <head>
      <meta charset="UTF-8">
      <meta name="viewport" content="width=device-width, initial-scale=1.0">
      <title>CSV Diagramm mit Chart.js</title>
      <link rel="stylesheet" href="styles.css">
   </head>
   <body>
      <div id="drop-area" class="drop-area" style="width: 100%;" ondrop="handleDrop(event)" ondragover="handleDragOver(event)">
         <p>Datei hier ablegen</p>
         <input type="file" id="csvFileInput" accept=".csv" style="display:none;" onchange="handleUpload()">
      </div>
      <div class="chart-container" style="width: 100%;">
         <canvas id="myChart"></canvas>
      </div>
      <button onclick="showPreviousData()">Vorheriger Tag</button>
      <button onclick="showNextData()">Nächster Tag</button>
      <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
      <script src="https://cdnjs.cloudflare.com/ajax/libs/hammer.js/2.0.8/hammer.min.js" integrity="sha512-UXumZrZNiOwnTcZSHLOfcTs0aos2MzBWHXOHOuB0J/R44QB0dwY5JgfbvljXcklVf65Gc4El6RjZ+lnwd2az2g==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
      <script src="https://cdnjs.cloudflare.com/ajax/libs/chartjs-plugin-zoom/2.0.1/chartjs-plugin-zoom.min.js" integrity="sha512-wUYbRPLV5zs6IqvWd88HIqZU/b8TBx+I8LEioQ/UC0t5EMCLApqhIAnUg7EsAzdbhhdgW07TqYDdH3QEXRcPOQ==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
      <script src="script.js"></script>
   </body>
</html>

JS

let startIndex = 0;
const displayCount = 1440;
let labels = [];
let datasets = [];
let originalDatasetVisibility = [];

function handleUpload() {
    const fileInput = document.getElementById('csvFileInput');
    const file = fileInput.files[0];
    handleFile(file);
}

function processData(csvData) {
    const rows = csvData.split('n');
    labels = [];
    datasets = [];
    originalDatasetVisibility = [];

    const colors = ['rgba(255, 0, 0, 1)', 'rgba(0, 255, 0, 1)', 'rgba(255, 255, 0, 1)', 'rgba(0, 0, 255, 1)'];

    const columns = rows[0].split(';');

    for (let i = 1; i < columns.length; i++) {
        const data = [];
        const currentLabel = columns[i];
        const color = colors[i - 1];

        for (let j = 1; j < rows.length; j++) {
            const cols = rows[j].split(';');
            if (i === 1) {
                labels.push(cols[0]);
            }
            data.push(parseFloat(cols[i]));
        }

        const dataset = {
            label: currentLabel,
            data: data,
            backgroundColor: color,
            borderColor: color,
            fill: false,
            borderWidth: 1,
            pointRadius: 1,
        };

        datasets.push(dataset);
        originalDatasetVisibility.push(true);
    }

    createChart(labels.slice(startIndex, startIndex + displayCount), datasets, function() {
        console.log('Diagramm wurde erstellt');
    });
}

function createChart(labels, datasets, callback) {
    const chartContainer = document.querySelector('.chart-container');
    const canvasElement = document.getElementById('myChart');

    if (canvasElement) {
        chartContainer.removeChild(canvasElement);
    }

    chartContainer.innerHTML = '<canvas id="myChart"></canvas>';

    const ctx = document.getElementById('myChart').getContext('2d');
    window.myChart = new Chart(ctx, {
        type: 'line',
        data: {
            labels: labels,
            datasets: datasets.map((dataset, index) => ({
                ...dataset,
                data: dataset.data.slice(startIndex, startIndex + displayCount),
                hidden: !originalDatasetVisibility[index],
            })),
        },
        options: {
            scales: {
                x: {
                    stacked: true,
                    min: labels[startIndex],
                    max: labels[startIndex + displayCount - 1],
                },
                y: {},
            },
            plugins: {
                zoom: {
                    pan: {
                        enabled: true,
                        mode: 'x'
                    },
                    zoom: {
                        wheel: {
                            enabled: true,
                        },
                        pinch: {
                            enabled: true
                        },
                        mode: 'x',
                    }
                }
            }
        }
    });

    if (callback && typeof callback === 'function') {
        callback();
    }

    window.myChart.resetZoom();
    window.myChart.ctx.canvas.addEventListener('wheel', handleZoom);
}

function handleZoom(event) {
    const chart = window.myChart;
    const chartArea = chart.chartArea;
    const originalDatasets = chart.data.datasets;

    const zoomEnabled = chart.options.plugins.zoom.zoom.wheel.enabled;
    const deltaY = event.deltaY;

    if (zoomEnabled && deltaY !== 0) {
        const deltaMode = event.deltaMode;
        const scaleDelta = deltaY > 0 ? 0.9 : 1.1;

        let newMinIndex = chart.getDatasetMeta(0).data.findIndex(
            (d) => d.x >= chartArea.left
        );
        let newMaxIndex = chart.getDatasetMeta(0).data.findIndex(
            (d) => d.x >= chartArea.right
        );

        if (deltaMode === 0) {
            newMinIndex = Math.max(0, newMinIndex - Math.abs(deltaY));
            newMaxIndex = Math.min(
                originalDatasets[0].data.length - 1,
                newMaxIndex + Math.abs(deltaY)
            );
        } else if (deltaMode === 1) {
            newMinIndex = Math.max(0, newMinIndex - Math.abs(deltaY) * 10);
            newMaxIndex = Math.min(
                originalDatasets[0].data.length - 1,
                newMaxIndex + Math.abs(deltaY) * 10
            );
        }

        const newMinLabel = originalDatasets[0].data[newMinIndex].label;
        const newMaxLabel = originalDatasets[0].data[newMaxIndex].label;

        chart.options.scales.x.min = newMinLabel;
        chart.options.scales.x.max = newMaxLabel;

        chart.update();
    }
}

function handleFile(file) {
    if (file) {
        const reader = new FileReader();

        reader.onload = function (e) {
            const csvData = e.target.result;
            processData(csvData);
        };

        reader.readAsText(file);
    } else {
        alert('Bitte eine CSV-Datei auswählen.');
    }
}

function handleDrop(event) {
    event.preventDefault();
    const file = event.dataTransfer.files[0];
    handleFile(file);
}

function handleDragOver(event) {
    event.preventDefault();
}

function showPreviousData() {
    if (startIndex - displayCount >= 0) {
        startIndex -= displayCount;
        updateChart();
    }
}

function showNextData() {
    if (startIndex + displayCount < labels.length) {
        startIndex += displayCount;
        updateChart();
    }
}

function updateChart() {
    const endIndex = Math.min(startIndex + displayCount, labels.length);
    const updatedLabels = labels.slice(startIndex, endIndex);
    const updatedDatasets = datasets.map((dataset, index) => ({
        ...dataset,
        data: dataset.data.slice(startIndex, endIndex),
        hidden: !originalDatasetVisibility[index],
    }));

    window.myChart.data.labels = updatedLabels;
    window.myChart.data.datasets = updatedDatasets;
    window.myChart.options.scales.x.min = updatedLabels[0];
    window.myChart.options.scales.x.max = updatedLabels[updatedLabels.length - 1];

    window.myChart.update();
}

function removeZoomEventListener() {
    window.myChart.ctx.canvas.removeEventListener('wheel', handleZoom);
}

2

Answers


  1. Well it depends on your use case, how the data has to visualized, how exact the representation has to be and how fast it needs to be.

    Basically there are two points of attack: "shrink" the data and keep the client side workload low.

    That said here are some tips to improve the performance:

    ChartJs related

    1. Checkout Chartjs official Performance "Tips and Tricks"

      • on my local high data example (~100MB): "Disable Animations", "Disable Point Drawing", "Enable spanGaps" and "Specify min and max for scales" drastically improved the performance, just to name a few. (but you will have to tweak the options, for optimal speed and visual appeal)

      Image 1: Minor tweaks ~ 70 Sec (5255999 data rows)
      No Scales ~ 70 Seconds

      Image 2: Minor tweaks + fixed Scaling ~ 3 Sec (5255999 data rows)
      Some scale adjustments ~ 4 Sec

    2. prepare the data for easy use

      • send parsed and sorted json data instead of csv, that you manually parse on the client side. (the file might be bigger, but chartjs won’t need to reparse and normalize the data on the clientside)
      • aggregate / pre-calcuate / clean data before you send it to the user.
    3. You can load a small chunk of data and async load the data, and update the chart data, checkout this update chart example

    On the Data/Web side

    1. First of all analyze your data
      • do you have to send all the columns, for one single chart? If not remove unneeded columns and rows.
      • As seen in the images above, chartjs "hides" some values to fit the chart on the canvas, if you know this, just send "visible" values.
    2. If not 100% accuracy is needed, remove some rows and let chartjs fillin the gaps.
    3. Just send data for the max resolution needed, split the data into multiple files. Especially if you only need a subset for a specific data-resolution and/or if the data is used in different charts.
    4. load the data async, and when it is loaded display is.

    Bonus Tipp: if you don’t have to use chartjs check out this SO question/answer here, it recommends using Highcharts for bigdata instead of chartjs.

    Login or Signup to reply.
  2. After writing my last answer, I tried to optimize your (long) code, I noticed:

    1. the upload is only on the clientside
    2. And probably the main issue (without knowing your data) is the parsing of the csv data. (with my test dataset it takes > 8 sec, just to parse the csv-file, the chart rendering is "fast")

    So sadly my last answer, would not be helpful in your specific case, so here is a more tailored solution.

    So what could be a solution:

    1. If you can define the file upload format, try to make it a json file that can be used right away, with JSON.parse or so.
    2. If it has to be csv file, maybe you can change the structure, simply transpose the data, so that you don’t have to iterate so many times over it. (one line/row is one dataset, so to speak)
    3. If all that is not possible, and you need to use that file-type and structure, you could rewrite your for -loops to something like this:
    function processData(csvData) {
        const colors = [ 'rgba(255, 0, 0, 1)', 'rgba(0, 255, 0, 1)',
            'rgba(255, 255, 0, 1)', 'rgba(0, 0, 255, 1)' ];
        const rows = csvData.split('n');
        const columns = rows[0].split(';');
        labels = [];
        originalDatasetVisibility = '1'.repeat(columns.length - 1).split('');
        datasets = '1'.repeat(columns.length - 1).split('').map( (x, idx) => ({
                data: [],
                backgroundColor: colors[idx],
                borderColor: colors[idx],
                fill: false,
                borderWidth: 1,
                pointRadius: 0,  // minor performance tweak
                spanGaps: true,  // minor performance tweak
            })
        );
        for (let rowIdx = 1; rowIdx < rows.length; rowIdx++){
            let cols = rows[rowIdx].split(';');
            for(let colIdx = 1; colIdx < cols.length; colIdx++){
                if (colIdx === 1) {
                    labels.push(cols[0]);
                }
                datasets[colIdx - 1].label = columns[colIdx];
                datasets[colIdx - 1].data.push(cols[colIdx]);
            }
        }
        createChart(labels.slice(startIndex, startIndex + displayCount), datasets,
            () => console.log('Diagramm wurde erstellt')
        );
    }
    

    Since in the original code the split function is called cols x row times (in my case 1279995 times), and it is pretty "expensive" operation, so just changing the order of the loops improves the time-to-render from >8 sec to < 3 sec (for my test dataset).

    There are probably more, optimization to be done, if you want to invest more time, but this was the glaring issue for me.

    Here the result chart of my dataset:
    Screenshot test dataset

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search