- Considers case of simple regret, where exploration and exploitation are separated
- In the setting, the agent is allowed
*n*pulls of arms and at the end must recommend an arm. The agent is not necessarily aware of the value of*n*. - Costs during exploration are not in terms of regret, but some other resource, such as computation time
- May also require anytime performance
- The smaller the cumulative regret, the larger the simple regret
- Related to exploration/exploitation tradeoff
- Simple regret can be upper bounded in terms of cumulative regret
- For finite-armed bandits, best simple regret is achieved by sampling each arm a linear number of times, whereas in cumulative regret suboptimal arms should not be sampled more than a logarithmic number of times

- The statement that each arm should get linear number of pulls for best simple regret is true only in case of large
*n*. For smaller values other techniques will yield better results (UCB type is better) - Rate of decrease of simple regret is 1/sqrt(n) in distribution free case, in distribution dependent case the decrease is exponential. I’m not sure what the difference is here though.
- In continuous spaces the regret bound seems to be O(
*n*) which i dont understand

Advertisements
(function(){var c=function(){var a=document.getElementById("crt-231333595");window.Criteo?(a.parentNode.style.setProperty("display","inline-block","important"),a.style.setProperty("display","block","important"),window.Criteo.DisplayAcceptableAdIfAdblocked({zoneid:388248,containerid:"crt-231333595",collapseContainerIfNotAdblocked:!0,callifnotadblocked:function(){a.style.setProperty("display","none","important");a.style.setProperty("visbility","hidden","important")}})):(a.style.setProperty("display","none","important"),a.style.setProperty("visibility","hidden","important"))};if(window.Criteo)c();else{if(!__ATA.criteo.script){var b=document.createElement("script");b.src="//static.criteo.net/js/ld/publishertag.js";b.onload=function(){for(var a=0;a<__ATA.criteo.cmd.length;a++){var b=__ATA.criteo.cmd[a];"function"===typeof b&&b()}};(document.head||document.getElementsByTagName("head")[0]).appendChild(b);__ATA.criteo.script=b}__ATA.criteo.cmd.push(c)}})();
(function(){var c=function(){var a=document.getElementById("crt-2040575046");window.Criteo?(a.parentNode.style.setProperty("display","inline-block","important"),a.style.setProperty("display","block","important"),window.Criteo.DisplayAcceptableAdIfAdblocked({zoneid:837497,containerid:"crt-2040575046",collapseContainerIfNotAdblocked:!0,callifnotadblocked:function(){a.style.setProperty("display","none","important");a.style.setProperty("visbility","hidden","important")}})):(a.style.setProperty("display","none","important"),a.style.setProperty("visibility","hidden","important"))};if(window.Criteo)c();else{if(!__ATA.criteo.script){var b=document.createElement("script");b.src="//static.criteo.net/js/ld/publishertag.js";b.onload=function(){for(var a=0;a<__ATA.criteo.cmd.length;a++){var b=__ATA.criteo.cmd[a];"function"===typeof b&&b()}};(document.head||document.getElementsByTagName("head")[0]).appendChild(b);__ATA.criteo.script=b}__ATA.criteo.cmd.push(c)}})();