javascript - Limit Q promise concurrency in Node js - Stack Overflow

Is there any way to limit the number of concurrent Q promises to be executed at once in node js?I am b

Is there any way to limit the number of concurrent Q promises to be executed at once in node js?

I am building a web scraper , which must request and parse more 3000+ pages and without throttle some of the requests i make aren't responded to on time, so the connection rests and the needed response (html code) bees unavailable.

To counter act this, i found that limiting the number of requests my problem goes away.


I have tried the following methods but to no avail:

  • Concurrency limit in Q promises - node
  • How can I limit Q promise concurrency?

I need to request an array of urls, doing only 1 request at a time and when all urls in the array have pleted, then return the results in a array.

function processWebsite() {
  //puted by this stage
  urls = [u1,u2,u3,u4,l5,u6,u7,u8,u9];

  var promises = throttle(urls,1,myfunction);

  // myfunction returns a Q promise and takes a considerable 
  // amount of time to resolve (approximately 2-5 minutes)
  
  Q.all(promises).then(function(results){
      //work with the results of the promises array
  });
}

Is there any way to limit the number of concurrent Q promises to be executed at once in node js?

I am building a web scraper , which must request and parse more 3000+ pages and without throttle some of the requests i make aren't responded to on time, so the connection rests and the needed response (html code) bees unavailable.

To counter act this, i found that limiting the number of requests my problem goes away.


I have tried the following methods but to no avail:

  • Concurrency limit in Q promises - node
  • How can I limit Q promise concurrency?
  • https://gist.github./gaearon/7930162
  • https://github./ForbesLindesay/throat

I need to request an array of urls, doing only 1 request at a time and when all urls in the array have pleted, then return the results in a array.

function processWebsite() {
  //puted by this stage
  urls = [u1,u2,u3,u4,l5,u6,u7,u8,u9];

  var promises = throttle(urls,1,myfunction);

  // myfunction returns a Q promise and takes a considerable 
  // amount of time to resolve (approximately 2-5 minutes)
  
  Q.all(promises).then(function(results){
      //work with the results of the promises array
  });
}
Share Improve this question edited Jun 8, 2021 at 23:42 DisappointedByUnaccountableMod 6,8464 gold badges20 silver badges23 bronze badges asked Nov 18, 2014 at 11:04 user3438286user3438286 931 silver badge6 bronze badges
Add a ment  | 

4 Answers 4

Reset to default 3

I'd do this, which will iterate over each URL, building a chain of promises that run when the previous one finishes, and resolves with an array of the request results.

return urls.reduce(function(acc, url){
    return acc.then(function(results)
        return myfunction(url).then(function(requestResult){
             return results.concat(requestResult)
        });
    });
}, Q.resolve([]));

You could turn that into a helper too:

var results = map(urls, myfunction);

function map(items, fn){
    return items.reduce(function(acc, item){
        return acc.then(function(results)
            return fn(item).then(function(result){
                 return results.concat(result)
            });
        });
    }, Q.resolve([])
}

Note, the bluebird promise library has a helper to simplify this kind of thing.

return Bluebird.map(urls, myfunction, {concurrency: 1});

Here is my stab at making a throttled map function for Q.

function qMap(items, worker, concurrent) {
    var result = Q.defer();
    var work = [];
    var working = 0;
    var done = 0;

    concurrent = parseInt(concurrent, 10) || 1;

    function getNextIndex() {
        var i;
        for (i = 0; i < items.length; i++) {
            if (typeof work[i] === "undefined") return i;
        }
    }
    function doneWorking() {
        working--;
        done++;
        result.notify( +((100 * done / items.length).toFixed(1)) );
        if (!startWorking() && done === items.length) {
            result.resolve(work);
        }
    }
    function startWorking() {
        var index = getNextIndex();
        if (typeof index !== "undefined" && working < concurrent) {
            working++;
            work[index] = worker(items[index]).finally(doneWorking);
            return true;
        }
    }
    while (startWorking());
    return result.promise;
}

It accepts

  • an array of items to work on (URLs, in your case),
  • a worker (which must be a function that accepts an item and returns a promise)
  • and a maximum value of concurrent items to work on at any given time.

It returns

  • a promise and
  • resolves to an array of settled promises when all workers have finished.

It does not fail, you must inspect the individual promises to determine the overall state of the operation.

In your case you would use it like that, for example with 15 concurrent requests:

// myfunction returns a Q promise and takes a considerable 
// amount of time to resolve (approximately 2-5 minutes)

qMap(urls, myfunction, 15)
.progress(function (percentDone) {
    console.log("progress: " + percentDone);
})
.done(function (urlPromises) {
    console.log("all done: " + urlPromises);
});

You can request a new url in a then() block

myFunction(urls[0]).then(function(result) {
  myFunction(urls[1]).then(function(result) {
    myFunction(urls[2]).then(function(result) {
      ...
    });
  });
});

Of course, this would be its dynamic behaviour. I'd mantain a queue and dequeue a single url once a promise is resolved. Then make another request. And perhaps have a hash object relating urls to results.

A second take:

var urls = ...;
var limit = ...;
var dequeue = function() {
  return an array containing up to limit
};

var myFunction = function(dequeue) {
  var urls = dequeue();

  $q.all(process urls);
};

myFunction(dequeue).then(function(result) {
  myFunction(dequeue).then(function(result) {
    myFunction(dequeue).then(function(result) {
      ...
    });
  });
});

No external libraries. Just plain JS.

If you really need just 1 single request at a time then it's easy using async/await:

async function processWebsite(urls) {
  const responsesAndErrors = new Array(urls.length);
  for (let i = 0; i < urls.length; i++) {
    try {
      responsesAndErrors[i] = await processPage(urls[i]);
    } catch(error) {
      responsesAndErrors[i] = error;
    }
  }
  return responsesAndErrors;
}

async function processPage(url) {
  console.log(`${url} started`);
  const delay = Math.floor(Math.random() * 1500);
  return new Promise((resolve, reject) => {
    setTimeout(() => {
      if (delay <= 1000) {
        console.log(`${url} finished successfully`);
        resolve(`${url} success`);
      } else {
        console.log(`${url} finished with error`);
        reject(`${url} error`);
      }
    }, delay);
  });
}

const urls = new Array(10).fill('url').map((url, index) => `${url}_${index + 1}`);

processWebsite(urls).then(responses => console.log('All', responses));

If you need more than 1 request at a time then it can be resolved using recursion.

The idea is that initially we immediately fetch the maximum allowed number of pages and each of these requests should recursively initiate a new request on its pletion.

In this example I populate successful responses together with errors and I process all pages but it's possible to slightly modify algorithm if you want to terminate batch processing on the first failure.

async function processWebsite(urls, limit) {
  limit = Math.min(urls.length, limit);

  return new Promise((resolve, reject) => {
    const responsesOrErrors = new Array(urls.length);
    let startedCount = 0;
    let finishedCount = 0;
    let hasErrors = false;

    function recursiveProcess() {
      let index = startedCount++;

      processPage(urls[index])
        .then(res => {
          responsesOrErrors[index] = res;
        })
        .catch(error => {
          responsesOrErrors[index] = error;
          hasErrors = true;
        })
        .finally(() => {
          finishedCount++;
          if (finishedCount === urls.length) {
            hasErrors ? reject(responsesOrErrors) : resolve(responsesOrErrors);
          } else if (startedCount < urls.length) {
            recursiveProcess();
          }
        });
    }

    for (let i = 0; i < limit; i++) {
      recursiveProcess();
    }
  });
}

async function processPage(url) {
  console.log(`${url} started`);
  const delay = Math.floor(Math.random() * 1500);
  return new Promise((resolve, reject) => {
    setTimeout(() => {
      if (delay <= 1000) {
        console.log(`${url} finished successfully`);
        resolve(`${url} success`);
      } else {
        console.log(`${url} finished with error`);
        reject(`${url} error`);
      }
    }, delay);
  });
}

const urls = new Array(10).fill('url').map((url, index) => `${url}_${index + 1}`);

processWebsite(urls, 3)
  .then(responses => console.log('All successfull', responses))
  .catch(responsesWithErrors => console.log('All with several failed', responsesWithErrors));

发布者:admin,转转请注明出处:http://www.yc00.com/questions/1745274688a4619965.html

相关推荐

  • javascript - Limit Q promise concurrency in Node js - Stack Overflow

    Is there any way to limit the number of concurrent Q promises to be executed at once in node js?I am b

    57分钟前
    10

发表回复

评论列表(0条)

  • 暂无评论

联系我们

400-800-8888

在线咨询: QQ交谈

邮件:admin@example.com

工作时间:周一至周五,9:30-18:30,节假日休息

关注微信