Question

I’m trying to upload S3 object streams by merging them into a zip archive. The process works well for smaller files, but it gets stuck for larger ones. After reviewing the logs, I noticed that the connection consistently freezes at the 5-minute mark. It seems that S3 has an idle connection timeout of 5 minutes.

Here’s the code:

const createZipAndUpload = async (
  filePath,
  fileName,
  zipFileName,
  exportIdentifier,
  blobs,
  matrixCSVs,
  exportRepository,
) => {
  return new Promise((resolve, reject) => {
    try {
      const requestHandler = new NodeHttpHandler({
        httpsAgent: new Agent({
          KeepAlive: true,
          rejectUnauthorized: true
        }),
        // tried all the possible options
        requestTimeout: 3600000,
        timeout: 3600000,
        connectionTimeout: 3600000,
        socketTimeout: 3600000
      });
      const s3Client = gets3Client(process.env.AWS_REGION, requestHandler);
      const exportBucketName = 'export';

      const streamPassThrough = new PassThrough();

      let archive = archiver('zip');

      // Handle errors
      archive.on('error', (error) => {
        console.log(`error on archive: ${error}`);
        return new Error(
          `${error.name} ${error.code} ${error.message} ${error.path} ${error.stack}`,
        );
      });

      archive.pipe(streamPassThrough).on('error', (error) => {
        console.log(`error on streamPassThrough: ${error}`);
      });

      archive.file(filePath, { name: fileName });

      matrixCSVs.forEach((matrixCSV) => {
        archive.file(`tmp/${matrixCSV.fileName}`, { name: matrixCSV.fileName });
      });

      const MAX_CONCURRENCY = 3;

      let s3ClientForBlobs = gets3Client(process.env.AWS_REGION);

      async.eachOfLimit(
        blobs,
        MAX_CONCURRENCY,
        async (blobItem, index, next) => {
          console.log(`Processing ${index}: ${blobItem.key}`);

          if ((index + 1) % 50 === 0) {
            console.log(`new s3 client`);
            // Reassigning a new S3 client at 50, as it appears that S3 is throttling the requests at 50 objects.
            s3ClientForBlobs = gets3Client(process.env.AWS_REGION);
          }

          const stream = await blobItem.getS3ObjectWithStream(s3ClientForBlobs);

          if (stream) {
            stream.on('end', () => {});
            archive.append(stream, { name: blobItem.key });
          }
        },
        async (err) => {
          if (err) {
            reject();
            throw new Error(`error in getting streams ${err}`);
          }

          archive.finalize();

          // Create an Upload instance with the S3 client and parameters
          const upload = new Upload({
            client: s3Client,
            partSize: 100000000, // optional size of each part
            params: {
              ACL: 'private',
              Body: streamPassThrough,
              Bucket: exportBucketName,
              ContentType: 'application/zip',
              Key: zipFileName,
              StorageClass: 'STANDARD_IA',
            },
          });

          console.log('Starting upload');

          // Handle progress events
          upload.on('httpUploadProgress', (progress) => {
            if (progress) {
              console.log(`progress: ${progress.part}, ${progress.loaded}`);
            }
            return progress;
          });

          upload
            .done()
            .then(async () => {
              console.log('Upload completed.');
              await updateExport(
                exportIdentifier,
                zipFileName,
                exportRepository,
              );
              await invokeExportCreatedTask(exportIdentifier);

              streamPassThrough.end();
              fs.unlinkSync(filePath);
              resolve();
            })
            .catch((err) => {
              console.error('Upload error:', err);
              reject();
            });
        },
      );

      streamPassThrough.on('end', () => {
        console.log(`ending stream`);
      });
    } catch (error) {
      console.log(`Got error creating stream to S3: ${error.message}`);
      // throw error;
      reject();
    }
  });
};

How can I improve this to support larger uploads?

How can I set the requestTimeout/socketTimeout for S3 using AWS SDK V3 in Node.js?

LEAVE A COMMENT Hủy