diff --git a/src/_locales/en_US/messages.json b/src/_locales/en_US/messages.json index f10fe37701..59b9317474 100644 --- a/src/_locales/en_US/messages.json +++ b/src/_locales/en_US/messages.json @@ -173,6 +173,14 @@ "message": "Learn in Private/Incognito windows", "description": "Checkbox label on the general settings page" }, + "options_community_learning_setting": { + "message": "Enable community learning and share data about trackers", + "description": "Checkbox label on the general settings page" + }, + "options_community_learning_warning": { + "message": "When you enable community learning, your browser will share some information it collects about trackers with EFF. Specifically, each time your instance of Privacy Badger observes a particular tracker on a website that it has not seen before, it will share the origin (top-level domain +1) of both the tracker and the website, as well as the type of tracking action that it observed. EFF will only use this information for generating community learning lists, and will never share personal information with third parties. For more details, see our privacy policy: https://link.to.come", + "description": "Checkbox label on the general settings page" + }, "options_incognito_warning": { "message": "Enabling learning in Private/Incognito windows may leave traces of your private browsing history on your computer. By default, Privacy Badger will block trackers it already knows about in Private/Incognito windows, but it won't learn about new trackers. You might want to enable this option if a lot of your browsing happens in Private/Incognito windows.", "description": "Tooltip on the general settings page" diff --git a/src/js/background.js b/src/js/background.js index 6cdee4c602..7f7c86fd30 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -715,6 +715,7 @@ Badger.prototype = { migrationLevel: 0, seenComic: false, sendDNTSignal: true, + shareLearning: false, showCounter: true, showIntroPage: true, showNonTrackingDomains: false, @@ -930,13 +931,36 @@ Badger.prototype = { * and if tab_id is for an incognito window, * is learning in incognito windows enabled? */ - isLearningEnabled(tab_id) { + isLocalLearningEnabled(tab_id) { return ( this.getSettings().getItem("learnLocally") && incognito.learningEnabled(tab_id) ); }, + /** + * Is community learning generally enabled, + * and is tab_id in a regular (not incognito) window? + */ + isCommunityLearningEnabled(tab_id) { + return ( + this.getSettings().getItem("shareLearning") && + !incognito.isIncognito(tab_id) + ); + }, + + /** + * Is any kind of learning (local or community) enabled on this tab? + * + * TODO: should community learning happen in incognito tabs? + */ + isLearningEnabled(tab_id) { + return ( + this.isLocalLearningEnabled(tab_id) || + this.isCommunityLearningEnabled(tab_id) + ); + }, + /** * Check if widget replacement functionality is enabled. */ @@ -991,7 +1015,7 @@ Badger.prototype = { }, /** - * Checks if local storage ( in dict) has any high-entropy keys + * Checks if local storage (in dict) has any high-entropy keys * * @param {Object} lsItems Local storage dict * @returns {boolean} true if it seems there are supercookies diff --git a/src/js/constants.js b/src/js/constants.js index 5a77d0215c..8eb8fba661 100644 --- a/src/js/constants.js +++ b/src/js/constants.js @@ -40,6 +40,17 @@ var exports = { TRACKING_THRESHOLD: 3, MAX_COOKIE_ENTROPY: 12, + // The max amount of time (in milliseconds) that PB will wait before sharing a + // tracking action with EFF for community learning + MAX_CL_WAIT_TIME: 5 * 60 * 1000, // five minutes + + // The probability that any given tracking action will be logged to the + // community server, as a float from 0.0 to 1.0 + CL_PROBABILITY: 1.0, + + // size of the in-memory community learning cache + CL_CACHE_SIZE: 5000, + DNT_POLICY_CHECK_INTERVAL: 1000, // one second }; @@ -50,5 +61,12 @@ exports.BLOCKED_ACTIONS = new Set([ exports.USER_COOKIEBLOCK, ]); +exports.TRACKER_TYPES = Object.freeze({ + COOKIE: "cookie", + COOKIE_SHARE: "cookie_share", + SUPERCOOKIE: "supercookie", + FINGERPRINT: "fingerprint", +}) + return exports; })(); diff --git a/src/js/heuristicblocking.js b/src/js/heuristicblocking.js index 2e73d2d5da..46441001fd 100644 --- a/src/js/heuristicblocking.js +++ b/src/js/heuristicblocking.js @@ -37,6 +37,9 @@ function HeuristicBlocker(pbStorage) { // impossible to attribute to a tab. this.tabOrigins = {}; this.tabUrls = {}; + + // in-memory cache for community learning + this.previouslySharedTrackers = new Set(); } HeuristicBlocker.prototype = { @@ -104,8 +107,10 @@ HeuristicBlocker.prototype = { * @param {Boolean} check_for_cookie_share whether to check for cookie sharing */ heuristicBlockingAccounting: function (details, check_for_cookie_share) { + let tab_id = details.tabId; + // ignore requests that are outside a tabbed window - if (details.tabId < 0 || !badger.isLearningEnabled(details.tabId)) { + if (tab_id < 0 || !badger.isLearningEnabled(tab_id)) { return {}; } @@ -115,12 +120,12 @@ HeuristicBlocker.prototype = { // if this is a main window request, update tab data and quit if (details.type == "main_frame") { - self.tabOrigins[details.tabId] = request_origin; - self.tabUrls[details.tabId] = details.url; + self.tabOrigins[tab_id] = request_origin; + self.tabUrls[tab_id] = details.url; return {}; } - let tab_origin = self.tabOrigins[details.tabId]; + let tab_origin = self.tabOrigins[tab_id]; // ignore first-party requests if (!tab_origin || !utils.isThirdPartyDomain(request_origin, tab_origin)) { @@ -141,15 +146,16 @@ HeuristicBlocker.prototype = { // check if there are tracking cookies if (hasCookieTracking(details, request_origin)) { - self._recordPrevalence(request_host, request_origin, tab_origin); + self._recordPrevalence(request_host, request_origin, tab_origin, tab_id, + constants.TRACKER_TYPES.COOKIE); return {}; } // check for cookie sharing iff this is an image in the top-level frame, and the request URL has parameters - if (check_for_cookie_share && details.type == 'image' && details.frameId === 0 && details.url.indexOf('?') > -1) { + if (false && details.type == 'image' && details.frameId === 0 && details.url.indexOf('?') > -1) { // get all non-HttpOnly cookies for the top-level frame // and pass those to the cookie-share accounting function - let tab_url = self.tabUrls[details.tabId]; + let tab_url = self.tabUrls[tab_id]; let config = { url: tab_url @@ -161,7 +167,7 @@ HeuristicBlocker.prototype = { chrome.cookies.getAll(config, function (cookies) { cookies = cookies.filter(cookie => !cookie.httpOnly); if (cookies.length >= 1) { - self.pixelCookieShareAccounting(tab_url, tab_origin, details.url, request_host, request_origin, cookies); + self.pixelCookieShareAccounting(tab_id, details.url, request_host, request_origin, cookies); } }); } @@ -179,8 +185,10 @@ HeuristicBlocker.prototype = { * @param cookies are the result of chrome.cookies.getAll() * @returns {*} */ - pixelCookieShareAccounting: function (tab_url, tab_origin, request_url, request_host, request_origin, cookies) { + pixelCookieShareAccounting: function (tab_id, request_url, request_host, request_origin, cookies) { let params = (new URL(request_url)).searchParams, + tab_origin = self.tabOrigins[tab_id], + tab_url = self.tabUrls[tab_id], TRACKER_ENTROPY_THRESHOLD = 33, MIN_STR_LEN = 8; @@ -251,7 +259,10 @@ HeuristicBlocker.prototype = { log("Found high-entropy cookie share from", tab_origin, "to", request_host, ":", entropy, "bits\n cookie:", cookie.name, '=', cookie.value, "\n arg:", key, "=", value, "\n substring:", s); - this._recordPrevalence(request_host, request_origin, tab_origin); + this._recordPrevalence( + request_host, request_origin, tab_origin, tab_id, + constants.TRACKER_TYPES.COOKIE_SHARE + ); return; } } @@ -265,8 +276,11 @@ HeuristicBlocker.prototype = { * @param {String} tracker_fqdn The fully qualified domain name of the tracker * @param {String} tracker_origin Base domain of the third party tracker * @param {String} page_origin Base domain of page where tracking occurred + * @param {Integer} tab_id the ID of the tab the user is in + * @param {String} tracker_type the kind of tracking action that was observed */ - updateTrackerPrevalence: function (tracker_fqdn, tracker_origin, page_origin) { + updateTrackerPrevalence: function (tracker_fqdn, tracker_origin, page_origin, + tab_id, tracker_type) { // abort if we already made a decision for this fqdn let action = this.storage.getAction(tracker_fqdn); if (action != constants.NO_TRACKING && action != constants.ALLOW) { @@ -276,7 +290,9 @@ HeuristicBlocker.prototype = { this._recordPrevalence( tracker_fqdn, tracker_origin, - page_origin + page_origin, + tab_id, + tracker_type ); }, @@ -292,8 +308,9 @@ HeuristicBlocker.prototype = { * @param {String} tracker_fqdn The FQDN of the third party tracker * @param {String} tracker_origin Base domain of the third party tracker * @param {String} page_origin Base domain of page where tracking occurred + * @param {String} tracker_type the kind of tracking action that was observed */ - _recordPrevalence: function (tracker_fqdn, tracker_origin, page_origin) { + _recordPrevalence: function (tracker_fqdn, tracker_origin, page_origin, tab_id, tracker_type) { var snitchMap = this.storage.getStore('snitch_map'); var firstParties = []; if (snitchMap.hasItem(tracker_origin)) { @@ -310,23 +327,78 @@ HeuristicBlocker.prototype = { return; // We already know about the presence of this tracker on the given domain } - // record that we've seen this tracker on this domain (in snitch map) - firstParties.push(page_origin); - snitchMap.setItem(tracker_origin, firstParties); + // If community learning is enabled, queue up a request to the EFF server + if (badger.isCommunityLearningEnabled(tab_id)) { + let page_fqdn = (new URI(this.tabUrls[tab_id])).host; + this.shareTrackerInfo(page_fqdn, tracker_fqdn, tracker_type); + } - // ALLOW indicates this is a tracker still below TRACKING_THRESHOLD - // (vs. NO_TRACKING for resources we haven't seen perform tracking yet). - // see https://github.com/EFForg/privacybadger/pull/1145#discussion_r96676710 - this.storage.setupHeuristicAction(tracker_fqdn, constants.ALLOW); - this.storage.setupHeuristicAction(tracker_origin, constants.ALLOW); + // If local learning is enabled, record that we've seen this tracker on this + // domain (in snitch map) + if (badger.isLocalLearningEnabled(tab_id)) { + firstParties.push(page_origin); + snitchMap.setItem(tracker_origin, firstParties); + + // ALLOW indicates this is a tracker still below TRACKING_THRESHOLD + // (vs. NO_TRACKING for resources we haven't seen perform tracking yet). + // see https://github.com/EFForg/privacybadger/pull/1145#discussion_r96676710 + this.storage.setupHeuristicAction(tracker_fqdn, constants.ALLOW); + this.storage.setupHeuristicAction(tracker_origin, constants.ALLOW); + + // block the origin if it has been seen on multiple first party domains + if (firstParties.length >= constants.TRACKING_THRESHOLD) { + log('blocklisting origin', tracker_fqdn); + this.blocklistOrigin(tracker_origin, tracker_fqdn); + } + } + }, - // Blocking based on outbound cookies - var httpRequestPrevalence = firstParties.length; + /** + * Share information about a tracker for community learning + */ + shareTrackerInfo: function(page_host, tracker_host, tracker_type) { + // Share a random sample of trackers we observe + if (Math.random() < constants.CL_PROBABILITY) { + // check if we've shared this tracker recently + // note that this check comes after checking against the snitch map + let tr_str = page_host + '+' + tracker_host + '+' + tracker_type; + if (this.previouslySharedTrackers.has(tr_str)) { + return; + } + + // add this entry to the cache + this.previouslySharedTrackers.add(tr_str); + + // if the cache gets too big, cut it in half + if (this.previouslySharedTrackers.size > constants.CL_CACHE_SIZE) { + this.previouslySharedTrackers = new Set( + // An array created from the set will have all of its entries ordered + // by when they were added + Array.from(this.previouslySharedTrackers).slice( + // keep the most recent half of the cache entries + Math.floor(constants.CL_CACHE_SIZE / 2) + ) + ); + } - // block the origin if it has been seen on multiple first party domains - if (httpRequestPrevalence >= constants.TRACKING_THRESHOLD) { - log('blocklisting origin', tracker_fqdn); - this.blocklistOrigin(tracker_origin, tracker_fqdn); + // now make the request to the database server + setTimeout(function() { + fetch("http://localhost:8080", { + method: "POST", + body: JSON.stringify({ + tracker_data: { + page_host: page_host, + tracker_host: tracker_host, + tracker_type: tracker_type, + } + }) + }).then(res => { + if (!res.ok) { + console.log("tracking action logging failed:", res); + } + }); + // share info after a random delay, to reduce network load on browser + }, Math.floor(Math.random() * constants.MAX_CL_WAIT_TIME)); } } }; diff --git a/src/js/incognito.js b/src/js/incognito.js index 56d2d9332d..c1bfdc1957 100644 --- a/src/js/incognito.js +++ b/src/js/incognito.js @@ -25,23 +25,30 @@ function startListeners() { chrome.tabs.onRemoved.addListener(onRemovedListener); } +function isIncognito(tab_id) { + // if we don't have incognito data for whatever reason, + // default to "true" + if (!tabs.hasOwnProperty(tab_id)) { + return true; + } + // else, do not learn in incognito tabs + return tabs[tab_id]; +} + function learningEnabled(tab_id) { if (badger.getSettings().getItem("learnInIncognito")) { // treat all pages as if they're not incognito return true; } - // if we don't have incognito data for whatever reason, - // default to disabled - if (!tabs.hasOwnProperty(tab_id)) { - return false; - } - // else, do not learn in incognito tabs - return !tabs[tab_id]; + + // otherwise, return true if this tab is _not_ incognito + return !isIncognito(tab_id); } /************************************** exports */ let exports = { learningEnabled, + isIncognito, startListeners, }; return exports; diff --git a/src/js/options.js b/src/js/options.js index 7ca70080ed..1b2119516f 100644 --- a/src/js/options.js +++ b/src/js/options.js @@ -190,6 +190,18 @@ function loadOptions() { }); }); + $('#community-learning-checkbox') + .prop("checked", OPTIONS_DATA.settings.shareLearning) + .on("click", (event) => { + const enabled = $(event.currentTarget).prop("checked"); + chrome.runtime.sendMessage({ + type: "updateSettings", + data: { + shareLearning: enabled + } + }, function () {}); + }); + $('#show-nontracking-domains-checkbox') .prop("disabled", OPTIONS_DATA.settings.learnLocally ? false : "disabled") .prop("checked", ( diff --git a/src/js/storage.js b/src/js/storage.js index 9074a41d87..8e19d43f3d 100644 --- a/src/js/storage.js +++ b/src/js/storage.js @@ -645,12 +645,24 @@ BadgerStorage.prototype = { } else if (self.name == "snitch_map") { for (let tracker_origin in mapData) { let firstPartyOrigins = mapData[tracker_origin]; + let firstParties = []; + if (self.hasItem(tracker_origin)) { + firstParties = self.getItem(tracker_origin); + } + + // this uses the same logic as updateTrackerPrevalence, but ignores + // checks for local learning and community learning for (let i = 0; i < firstPartyOrigins.length; i++) { - badger.heuristicBlocking.updateTrackerPrevalence( - tracker_origin, - tracker_origin, - firstPartyOrigins[i] - ); + firstParties.push(firstPartyOrigins[i]); + self.setItem(tracker_origin, firstParties); + badger.storage.setupHeuristicAction(tracker_origin, constants.ALLOW); + + // block the origin if it has been seen on multiple first party domains + if (firstParties.length >= constants.TRACKING_THRESHOLD) { + log('blocklisting origin', tracker_origin); + badger.heuristicBlocking.blocklistOrigin(tracker_origin, + tracker_origin); + } } } } diff --git a/src/js/webrequest.js b/src/js/webrequest.js index bb7469bf38..4307d25f26 100644 --- a/src/js/webrequest.js +++ b/src/js/webrequest.js @@ -448,7 +448,9 @@ function recordSupercookie(tab_id, frame_url) { badger.heuristicBlocking.updateTrackerPrevalence( frame_host, window.getBaseDomain(frame_host), - window.getBaseDomain(page_host) + window.getBaseDomain(page_host), + tab_id, + constants.TRACKER_TYPES.SUPERCOOKIE ); } @@ -515,7 +517,9 @@ function recordFingerprinting(tabId, msg) { // Mark this as a strike badger.heuristicBlocking.updateTrackerPrevalence( - script_host, script_origin, window.getBaseDomain(document_host)); + script_host, script_origin, window.getBaseDomain(document_host), + tabId, constants.TRACKER_TYPES.FINGERPRINT + ); } } // This is a canvas write diff --git a/src/skin/options.html b/src/skin/options.html index 7361139c74..f5f16af785 100644 --- a/src/skin/options.html +++ b/src/skin/options.html @@ -235,6 +235,17 @@

+
+ +