I am working with React and Nodejs. And we have scenerio where we have to upload 10gb of file on the aws. I know that cannot be possible with single request and that is why I devided it into chunks but still it is not working and throw error after couple of chunk uploads. Here is the code
Frontend
import React, { useState, useEffect } from "react";
import { UploadDropzone } from "../../../components/icons";
import { Progress } from "antd";
import Dropzone from "react-dropzone";
import axios from "axios";
import { notification } from "antd";
import { isEmptyArray } from "formik";
const chunkSize = 1048576 * 25;
function UploadLocalVideo(props) {
const { setUploadUrl, storageId, acceptFileType, isResourceBucket, callBackFun } = props;
const [progress, setProgress] = useState(0);
const [beginingOfTheChunk, setBeginingOfTheChunk] = useState(0);
const [endOfTheChunk, setEndOfTheChunk] = useState(chunkSize);
const [fileToBeUpload, setFileToBeUpload] = useState({});
const [progressUpload, setProgressUpload] = useState(0);
const [fileSize, setFileSize] = useState(0);
const [chunkCount, setChunkCount] = useState(0);
const [uploadId, setUploadId] = useState("");
const [name, setName] = useState("");
const [parts, setParts] = useState([]);
const [counter, setCounter] = useState(1);
const [percent, setPercent] = useState(0);
const [fileType, setFileType] = useState("");
const [uploading, setUploading] = useState(false);
const onUpload = async (files) => {
if (!isEmptyArray(files)) {
setUploading(true);
}
let percent = 0;
let name = Math.random().toString(36).substring(2, 10);
resetChunkProperties();
const _file = files[0];
name = (name + _file?.name?.replace(/ +/g, "")).replace(/[{()}]/g, "");
setName(name);
setFileType(_file.type);
setFileSize(_file.size);
const _totalCount = _file.size % chunkSize == 0 ? _file.size / chunkSize : Math.floor(_file.size / chunkSize) + 1;
setChunkCount(_totalCount);
percent = 100 / _totalCount;
setPercent(percent);
setFileToBeUpload(_file);
setProgress(1);
};
const resetChunkProperties = () => {
setProgressUpload(0);
setCounter(1);
setBeginingOfTheChunk(0);
setEndOfTheChunk(chunkSize);
setUploadId("");
setName("");
setParts([]);
setPercent(0);
setProgress(0);
setFileType("");
};
useEffect(() => {
if (fileSize > 0) {
fileUpload(counter);
}
}, [fileToBeUpload, progressUpload]);
const fileUpload = () => {
setCounter(counter + 1);
if (counter <= chunkCount) {
var chunk = fileToBeUpload.slice(beginingOfTheChunk, endOfTheChunk);
uploadChunk(chunk);
}
};
const uploadChunk = async (chunk) => {
try {
const formData = new FormData();
formData.append("file", chunk);
formData.append("name", name);
formData.append("fileType", fileType);
formData.append("chunkSize", chunk.size);
formData.append("currentIndex", counter);
formData.append("totalChunk", chunkCount);
formData.append("uploadId", uploadId);
formData.append("EtagArray", JSON.stringify(parts));
formData.append("storageId", storageId);
formData.append("isResourceBucket", isResourceBucket);
await axios({
method: "post",
url: `${process.env.REACT_APP_NEW_API_HOSTNAME}/upload-chunk`,
data: formData,
}).then((response) => {
if (response.data.uploadStatus == "uploading") {
setBeginingOfTheChunk(endOfTheChunk);
setEndOfTheChunk(endOfTheChunk + chunkSize);
setUploadId(response.data.uploadId);
setParts([...parts, response.data.etag]);
setProgress(parseInt((progressUpload + 1) * percent));
setProgressUpload(progressUpload + 1);
} else if (response.data.uploadStatus == "complete") {
setUploadUrl(response.data.url); // set url or response url
callBackFun(fileToBeUpload);
setProgress(100);
setUploading(false);
} else if (response.data.uploadStatus == "failed" || response.data.status == false) {
notification["error"]({ message: response.data.message });
setProgress(0);
setUploading(false);
} else if (response.data.success == false) {
notification["error"]({ message: "Storage not found" });
setProgress(0);
setUploading(false);
}
});
} catch (error) {
console.log(error, "error");
}
};
return (
<div className="form-group">
<Dropzone
onDrop={(acceptedFiles) => {
onUpload(acceptedFiles);
}}
accept={acceptFileType}
disabled={uploading}
>
{({ getRootProps, getInputProps }) => (
<div className="dropzone">
<div className="dropzone-inner" {...getRootProps()}>
<input {...getInputProps()} />
<div className="dropzone-icon">
<UploadDropzone />
</div>
<div className="dropzone-title">Upload a File</div>
<div className="dropzone-subtitle">
Click to <u>browse</u>, or drag & drop your file here
</div>
<Progress strokeLinecap="butt" type="line" percent={progress} /> {progress > 1 ? `${progress} %` : ""}
</div>
</div>
)}
</Dropzone>
</div>
);
}
export default UploadLocalVideo;
Backend
const handler = async (request, reply) => {
try {
let uploadId = (_.get(request.payload, "uploadId", ""));
let fileName = (_.get(request.payload, "name", ""));
let multiParts = JSON.parse(_.get(request.payload, "EtagArray", []))
let storageId = _.get(request, "payload.storageId", "")
let dataBuffer = Buffer.from(request.payload.file)
let currentChunkIndex = parseInt(request.payload.currentIndex);
let totalChunk = parseInt(request.payload.totalChunk);
let isResourceBucket = JSON.parse(_.get(request, "payload.isResourceBucket", false))
let region = ""
let credentials = {}
let squery = {
name: { $in: ["aws"] }
}
if (isResourceBucket && storageId == "") {
squery._id = mongoose.Types.ObjectId("62e112750e3d4dada1b9a3c0")
} else {
squery._id = mongoose.Types.ObjectId(storageId)
}
if (currentChunkIndex <= totalChunk) {
let storages = await Storage.findOne(squery)
if (storages && storages.credentials) {
credentials = await StreamService.decrypt_storage_credentials(storages.credentials, 'aws')
region = (credentials.region).replace("s3.", "").replace(".amazonaws.com", "").replace("s3-", "")
} else {
return reply({
status: false,
message: 'Storage not found',
uploadStatus: "failed"
})
}
}
AWS.config.update({
accessKeyId: credentials.access_key,
secretAccessKey: credentials.secret_key,
region: region
})
const s3 = new AWS.S3({
params: {
Bucket: credentials.bucket_name,
},
// endpoint,
signatureVersion: 'v4',
region: region,
apiVersion: '2006-03-01'
})
let filename = `uploadFile/${fileName}`;
if (currentChunkIndex == 1) {
uploadId = await getUploadId(filename, s3, credentials.bucket_name)
console.log("currentChunkIndex", " == ", currentChunkIndex, { uploadId })
}
if (currentChunkIndex < totalChunk) {
let etag = await uploadParts(filename, credentials.bucket_name, dataBuffer, currentChunkIndex, uploadId, s3)
return reply({
status: true,
uploadId,
etag,
uploadStatus: "uploading",
message: "uploading"
})
} else if (currentChunkIndex == totalChunk) {
let finalChunk = await uploadParts(filename, credentials.bucket_name, dataBuffer, currentChunkIndex, uploadId, s3)
let etag = { ETag: finalChunk.ETag, PartNumber: currentChunkIndex };
multiParts.push(etag)
let location = await completeFileMultiPartUpload(filename, credentials.bucket_name, uploadId, multiParts, s3, credentials.cdn_suffix);
location = location.replace("%2F","/")
console.log({ location })
if (location) {
return reply({
status: true,
url: location,
uploadStatus: "complete",
message: "upload completed"
})
}
}
} catch (error) {
logger.error(error)
return reply({
success: false,
message: error.message
});
}
};
These are the logs
So I have couple of question. 1. When we upload the 10gb file does it consumes node memory? if "Yes" then what is the solution?
Thanks
2
Answers
Node’s Heap Memory
Without streaming chunks of the upload stream to device storage on the server as chunks arrive you will only able to upload a file that fits into Node’s heap memory. The "upload stream" here is the
request
object in Node and/or Express, which is a readable stream.Links to how to increase heap memory are on the web but I couldn’t find clear information on the node.org site. You will also need more than 10Gb of available free memory on the server as well.
FormData upload from the front end
The most common workaround that I found when researching the topic was to upload the file as part of a
formData
object which will still need to stream file content to disk on the server when uploading large files.Sending file data as the request body
A less common solution is to upload the file as the request body using
fetch
(orXHTTPRequest
object). The uploaded data will still need to be streamed to local storage on the server. This method did not work for Node versions prior to 14.1, which would run out of heap space despite streaming therequest
stream to disk on the fly (updating Node fixed the problem).Uploading the file directly as binary data in the body also requires a fairly complex
init
object supplied tofetch
on the front end, including headers forContent Disposition
for the file name,Content-type
for file type,Last-modiied
for date modified, and in a local implementation of mine"Payload-Length
in octets to show the file size before the request body is sent for error control and security purposes.Note this answer is directed to the questions numbered 1 and 2 in the post about Node – without reference to AWS or React with which I claim no experience. It is also possible the posted error is occuring before running out of memory.
AWS Lambda does not support invocations with a payload of size greater than 6mb.
You can check this and other limits on this AWS Lambda documentation page under "Function configuration, deployment and execution". Also, note that AWS S3 has its own limits for multipart upload as you can see on this AWS S3 documentation page.
From your frontend code it seems like your file upload chunks are set to be 25mb each; which far surpasses this limit.
To answer your first question: yes, the file data uploaded is allocated on the function memory when you upload it to S3 through Lambda – although that does not seem to be the problem.
A possible solution would be setting your chunk size close to 5mb (~
1048576 * 5
) – but not exactly 6mb as your http request data also count for the size limit.And finally, from a best practices perspective, AWS Lambda isn’t well suitable for file uploads, specially large ones. Uploading your files directly to AWS S3 is supported and encouraged by Amazon through S3 pre-signed urls. Your lambda function would only be responsible for generating the pre-signed urls for each chunk and responding them to your frontend, which would also inherently reduce its computing costs.
You can check this amazing blog post from AWS itself describing on how to achieve that.