skip to Main Content

When downloading small 25mb chunks from a big file, the chunks are acutally much larger then 25mb. Can someone please help.

const fs = require('fs')
const files = fs.readdirSync('../in/')
files.map(fileName => {
  const readable = fs.createReadStream('../in/'+fileName)
  let size = 0

  let temp_chunk;
  let count = 0 

  readable.on("data", (chunk) => {
    temp_chunk += chunk
    const byteSize = new Blob([chunk]).size;

    size += byteSize
    let amount = size / 1000000
    amount = amount.toString()
    amount = amount.substring(0, 5) + 'mb'

    console.log(amount)
    if (amount > 24.5) {
      console.log(fileName+' '+count+' downloaded')
      fs.writeFileSync('../out/'+fileName+count, temp_chunk)
      temp_chunk = ''
      size = 0
      count++
    }
  })
})
  

i tried reading the size of the file, from temp_chunk, this worked but made download significantly slower.

2

Answers


  1. Use a Buffer to concatenate chunks. Directly concatenating binary data with += on a string (like temp_chunk += chunk) is not appropriate for binary data and can lead to unexpected results.

    Solution

    const fs = require('fs');
    const files = fs.readdirSync('../in/');
    files.forEach(fileName => {
      const readable = fs.createReadStream('../in/' + fileName);
      let size = 0;
      let temp_chunk = Buffer.alloc(0); // Initialize as an empty buffer
      let count = 0;
    
      readable.on("data", (chunk) => {
        temp_chunk = Buffer.concat([temp_chunk, chunk]);
        size += chunk.length; // Use chunk.length to get the size in bytes
    
        let amount = size / 1000000; // Convert to megabytes
    
        console.log(amount + 'mb');
        if (amount > 24.5) {
          console.log(fileName + ' ' + count + ' downloaded');
          fs.writeFileSync('../out/' + fileName + count, temp_chunk);
          temp_chunk = Buffer.alloc(0); // Reset to an empty buffer
          size = 0;
          count++;
        }
      });
    
      readable.on("end", () => {
        if (temp_chunk.length > 0) {
          // Write the remaining chunk if there is any
          console.log(fileName + ' ' + count + ' downloaded (last chunk)');
          fs.writeFileSync('../out/' + fileName + count, temp_chunk);
        }
      });
    });
    
    Login or Signup to reply.
  2. You can use WHATWG standard streams (which have been part of Node since v17) to read and buffer bytes before emitting. Using this pattern will also increase portability of your code to other environments (e.g. browsers, Deno, Bun, etc.)

    A TransformStream (Node, MDN) which buffers up to a parameterized byte length before emitting might look like this:

    buffered_byte_stream.mjs:

    import { TransformStream } from "node:stream/web";
    
    export class BufferedByteStream extends TransformStream {
      #buf = [];
      constructor(byteLength) {
        super({
          flush: (controller) => {
            if (this.#buf.length >= byteLength) {
              controller.enqueue(Uint8Array.from(this.#buf.splice(0, byteLength)));
            }
            if (this.#buf.length > 0) {
              controller.enqueue(
                Uint8Array.from(this.#buf.splice(0, this.#buf.length)),
              );
            }
          },
          transform: (chunk, controller) => {
            this.#buf.push(...chunk);
            if (this.#buf.length >= byteLength) {
              controller.enqueue(Uint8Array.from(this.#buf.splice(0, byteLength)));
            }
          },
        });
      }
    }
    

    And then using it in your main application code might look like this:

    main.mjs:

    import * as fs from "node:fs/promises";
    import * as path from "node:path";
    import { WritableStream } from "node:stream/web";
    
    import { BufferedByteStream } from "./buffered_byte_stream.mjs";
    
    function createWritableDestination(filePath) {
      let index = 0;
      return new WritableStream({
        async write(u8Arr) {
          const { base, dir } = path.parse(filePath);
          const fileName = `${base}.part${(index++).toString().padStart(3, "0")}`;
          console.log(`downloaded: ${fileName}`);
          await fs.writeFile(path.join(dir, fileName), u8Arr);
        },
      });
    }
    
    const inputDir = "../in";
    const outputDir = "../out";
    const byteLengthLimit = 25e6; // 25MB
    
    for (const entry of await fs.readdir(inputDir, { withFileTypes: true })) {
      if (!entry.isFile()) continue; // Skip non-files
      const fileHandle = await fs.open(path.join(inputDir, entry.name));
      try {
        await fileHandle
          .readableWebStream({ type: "bytes" }) // ReadableStream<Uint8Array>
          .pipeThrough(new BufferedByteStream(byteLengthLimit))
          .pipeTo(createWritableDestination(path.join(outputDir, entry.name)));
      } finally {
        await fileHandle.close();
      }
    }
    

    Code in TypeScript Playground

    Login or Signup to reply.
Please signup or login to give your own answer.
Back To Top
Search