Upload file in chunk with react and node not working - Amazon Web Sevices

Profer
February 24, 2023
251 views
0 votes
2 Answers

I am working with React and Nodejs. And we have scenerio where we have to upload 10gb of file on the aws. I know that cannot be possible with single request and that is why I devided it into chunks but still it is not working and throw error after couple of chunk uploads. Here is the code

Frontend

import React, { useState, useEffect } from "react";
import { UploadDropzone } from "../../../components/icons";
import { Progress } from "antd";
import Dropzone from "react-dropzone";
import axios from "axios";
import { notification } from "antd";
import { isEmptyArray } from "formik";

const chunkSize = 1048576 * 25;

function UploadLocalVideo(props) {
  const { setUploadUrl, storageId, acceptFileType, isResourceBucket, callBackFun } = props;
  const [progress, setProgress] = useState(0);
  const [beginingOfTheChunk, setBeginingOfTheChunk] = useState(0);
  const [endOfTheChunk, setEndOfTheChunk] = useState(chunkSize);
  const [fileToBeUpload, setFileToBeUpload] = useState({});
  const [progressUpload, setProgressUpload] = useState(0);
  const [fileSize, setFileSize] = useState(0);
  const [chunkCount, setChunkCount] = useState(0);
  const [uploadId, setUploadId] = useState("");
  const [name, setName] = useState("");
  const [parts, setParts] = useState([]);
  const [counter, setCounter] = useState(1);
  const [percent, setPercent] = useState(0);
  const [fileType, setFileType] = useState("");
  const [uploading, setUploading] = useState(false);
  const onUpload = async (files) => {
    if (!isEmptyArray(files)) {
      setUploading(true);
    }
    let percent = 0;
    let name = Math.random().toString(36).substring(2, 10);
    resetChunkProperties();
    const _file = files[0];
    name = (name + _file?.name?.replace(/ +/g, "")).replace(/[{()}]/g, "");
    setName(name);
    setFileType(_file.type);
    setFileSize(_file.size);

    const _totalCount = _file.size % chunkSize == 0 ? _file.size / chunkSize : Math.floor(_file.size / chunkSize) + 1;
    setChunkCount(_totalCount);
    percent = 100 / _totalCount;
    setPercent(percent);
    setFileToBeUpload(_file);
    setProgress(1);
  };

  const resetChunkProperties = () => {
    setProgressUpload(0);
    setCounter(1);
    setBeginingOfTheChunk(0);
    setEndOfTheChunk(chunkSize);
    setUploadId("");
    setName("");
    setParts([]);
    setPercent(0);
    setProgress(0);
    setFileType("");
  };
  useEffect(() => {
    if (fileSize > 0) {
      fileUpload(counter);
    }
  }, [fileToBeUpload, progressUpload]);

  const fileUpload = () => {
    setCounter(counter + 1);
    if (counter <= chunkCount) {
      var chunk = fileToBeUpload.slice(beginingOfTheChunk, endOfTheChunk);
      uploadChunk(chunk);
    }
  };

  const uploadChunk = async (chunk) => {
    try {
      const formData = new FormData();
      formData.append("file", chunk);
      formData.append("name", name);
      formData.append("fileType", fileType);
      formData.append("chunkSize", chunk.size);
      formData.append("currentIndex", counter);
      formData.append("totalChunk", chunkCount);
      formData.append("uploadId", uploadId);
      formData.append("EtagArray", JSON.stringify(parts));
      formData.append("storageId", storageId);
      formData.append("isResourceBucket", isResourceBucket);
      await axios({
        method: "post",
        url: `${process.env.REACT_APP_NEW_API_HOSTNAME}/upload-chunk`,
        data: formData,
      }).then((response) => {
        if (response.data.uploadStatus == "uploading") {
          setBeginingOfTheChunk(endOfTheChunk);
          setEndOfTheChunk(endOfTheChunk + chunkSize);
          setUploadId(response.data.uploadId);
          setParts([...parts, response.data.etag]);
          setProgress(parseInt((progressUpload + 1) * percent));
          setProgressUpload(progressUpload + 1);
        } else if (response.data.uploadStatus == "complete") {
          setUploadUrl(response.data.url); // set url or response url
          callBackFun(fileToBeUpload);
          setProgress(100);
          setUploading(false);
        } else if (response.data.uploadStatus == "failed" || response.data.status == false) {
          notification["error"]({ message: response.data.message });
          setProgress(0);
          setUploading(false);
        } else if (response.data.success == false) {
          notification["error"]({ message: "Storage not found" });
          setProgress(0);
          setUploading(false);
        }
      });
    } catch (error) {
      console.log(error, "error");
    }
  };
  return (
    <div className="form-group">
      <Dropzone
        onDrop={(acceptedFiles) => {
          onUpload(acceptedFiles);
        }}
        accept={acceptFileType}
        disabled={uploading}
      >
        {({ getRootProps, getInputProps }) => (
          <div className="dropzone">
            <div className="dropzone-inner" {...getRootProps()}>
              <input {...getInputProps()} />
              <div className="dropzone-icon">
                <UploadDropzone />
              </div>
              <div className="dropzone-title">Upload a File</div>
              <div className="dropzone-subtitle">
                Click to <u>browse</u>, or drag & drop your file here
              </div>
              <Progress strokeLinecap="butt" type="line" percent={progress} /> {progress > 1 ? `${progress} %` : ""}
            </div>
          </div>
        )}
      </Dropzone>
    </div>
  );
}

export default UploadLocalVideo;

Backend

const handler = async (request, reply) => {
    try {
        let uploadId = (_.get(request.payload, "uploadId", ""));
        let fileName = (_.get(request.payload, "name", ""));
        let multiParts = JSON.parse(_.get(request.payload, "EtagArray", []))
        let storageId = _.get(request, "payload.storageId", "")
        let dataBuffer = Buffer.from(request.payload.file)
        let currentChunkIndex = parseInt(request.payload.currentIndex);
        let totalChunk = parseInt(request.payload.totalChunk);
        let isResourceBucket = JSON.parse(_.get(request, "payload.isResourceBucket", false))
        let region = ""
        let credentials = {}
        let squery = {
            name: { $in: ["aws"] }
        }
        if (isResourceBucket && storageId == "") {
            squery._id = mongoose.Types.ObjectId("62e112750e3d4dada1b9a3c0")
        } else {
            squery._id = mongoose.Types.ObjectId(storageId)
        }

        if (currentChunkIndex <= totalChunk) {
            let storages = await Storage.findOne(squery)
            if (storages && storages.credentials) {
                credentials = await StreamService.decrypt_storage_credentials(storages.credentials, 'aws')
                region = (credentials.region).replace("s3.", "").replace(".amazonaws.com", "").replace("s3-", "")
            } else {
                return reply({
                    status: false,
                    message: 'Storage not found',
                    uploadStatus: "failed"
                })
            }
        }
        AWS.config.update({
            accessKeyId: credentials.access_key,
            secretAccessKey: credentials.secret_key,
            region: region
        })

        const s3 = new AWS.S3({
            params: {
                Bucket: credentials.bucket_name,
            },
            // endpoint,
            signatureVersion: 'v4',
            region: region,
            apiVersion: '2006-03-01'
        })

        let filename = `uploadFile/${fileName}`;

        if (currentChunkIndex == 1) {
            uploadId = await getUploadId(filename, s3, credentials.bucket_name)
            console.log("currentChunkIndex", " == ", currentChunkIndex, { uploadId })
        }

        if (currentChunkIndex < totalChunk) {
            let etag = await uploadParts(filename, credentials.bucket_name, dataBuffer, currentChunkIndex, uploadId, s3)
            return reply({
                status: true,
                uploadId,
                etag,
                uploadStatus: "uploading",
                message: "uploading"
            })
        } else if (currentChunkIndex == totalChunk) {
            let finalChunk = await uploadParts(filename, credentials.bucket_name, dataBuffer, currentChunkIndex, uploadId, s3)
            let etag = { ETag: finalChunk.ETag, PartNumber: currentChunkIndex };
            multiParts.push(etag)
            let location = await completeFileMultiPartUpload(filename, credentials.bucket_name, uploadId, multiParts, s3, credentials.cdn_suffix);
            location = location.replace("%2F","/")
            console.log({ location })
            if (location) {
                return reply({
                    status: true,
                    url: location,
                    uploadStatus: "complete",
                    message: "upload completed"
                })
            }
        }
    } catch (error) {
        logger.error(error)
        return reply({
            success: false,
            message: error.message
        });
    }
};

These are the logs

So I have couple of question. 1. When we upload the 10gb file does it consumes node memory? if "Yes" then what is the solution?

Thanks

Answers

- traktor
- February 24, 2023 at 9:06 am
- 0 votes
0
Node’s Heap Memory

Without streaming chunks of the upload stream to device storage on the server as chunks arrive you will only able to upload a file that fits into Node’s heap memory. The "upload stream" here is the request object in Node and/or Express, which is a readable stream.

Links to how to increase heap memory are on the web but I couldn’t find clear information on the node.org site. You will also need more than 10Gb of available free memory on the server as well.

FormData upload from the front end

The most common workaround that I found when researching the topic was to upload the file as part of a formData object which will still need to stream file content to disk on the server when uploading large files.

Sending file data as the request body

A less common solution is to upload the file as the request body using fetch (or XHTTPRequest object). The uploaded data will still need to be streamed to local storage on the server. This method did not work for Node versions prior to 14.1, which would run out of heap space despite streaming the request stream to disk on the fly (updating Node fixed the problem).

Uploading the file directly as binary data in the body also requires a fairly complex init object supplied to fetch on the front end, including headers for
- Content Disposition for the file name,
- Content-type for file type,
- Last-modiied for date modified, and in a local implementation of mine
- an IBM style Last modified-milliseconds header for UTF milliseconds,
- a custom header "Payload-Length in octets to show the file size before the request body is sent for error control and security purposes.
Note this answer is directed to the questions numbered 1 and 2 in the post about Node – without reference to AWS or React with which I claim no experience. It is also possible the posted error is occuring before running out of memory.
Login or Signup to reply.

- RovelcioJunior
- February 24, 2023 at 9:15 am
- 0 votes
0
AWS Lambda does not support invocations with a payload of size greater than 6mb.

You can check this and other limits on this AWS Lambda documentation page under "Function configuration, deployment and execution". Also, note that AWS S3 has its own limits for multipart upload as you can see on this AWS S3 documentation page.

From your frontend code it seems like your file upload chunks are set to be 25mb each; which far surpasses this limit.

To answer your first question: yes, the file data uploaded is allocated on the function memory when you upload it to S3 through Lambda – although that does not seem to be the problem.

A possible solution would be setting your chunk size close to 5mb (~ 1048576 * 5) – but not exactly 6mb as your http request data also count for the size limit.

And finally, from a best practices perspective, AWS Lambda isn’t well suitable for file uploads, specially large ones. Uploading your files directly to AWS S3 is supported and encouraged by Amazon through S3 pre-signed urls. Your lambda function would only be responsible for generating the pre-signed urls for each chunk and responding them to your frontend, which would also inherently reduce its computing costs.

You can check this amazing blog post from AWS itself describing on how to achieve that.

Login or Signup to reply.

Please signup or login to give your own answer.

Click here to cancel reply.

Upload file in chunk with react and node not working – Amazon Web Sevices

Answers