Add Tightest perceptron (#438)

ishii-norimi · web-flow · commit e13f6f6fe9f7 · 2022-11-24T23:41:13.000+09:00
diff --git a/README.md b/README.md
@@ -122,7 +122,7 @@ for (let i = 0; i < n; i++) {
 | task | model |
 | ---- | ----- |
 | clustering | (Soft / Kernel / Genetic / Weighted) k-means, k-means++, k-medois, k-medians, x-means, G-means, LBG, ISODATA, Fuzzy c-means, Possibilistic c-means, Agglomerative (complete linkage, single linkage, group average, Ward's, centroid, weighted average, median), DIANA, Monothetic, Mutual kNN, Mean shift, DBSCAN, OPTICS, HDBSCAN, DENCLUE, CLUES, PAM, CLARA, CLARANS, BIRCH, CURE, ROCK, C2P, PLSA, Latent dirichlet allocation, GMM, VBGMM, Affinity propagation, Spectral clustering, Mountain, SOM, GTM, (Growing) Neural gas, Growing cell structures, LVQ, ART, SVC, CAST, CHAMELEON, NMF, Autoencoder |
-| classification | (Fisher's) Linear discriminant, Quadratic discriminant, Mixture discriminant, Least squares, Ridge, (Complement / Negation / Universal-set / Selective) Naive Bayes (gaussian), AODE, (Fuzzy / Weighted) k-nearest neighbor, Radius neighbor, Nearest centroid, ENN, NNBCA, ADAMENN, DANN, IKNN, Decision tree, Random forest, Extra trees, GBDT, XGBoost, ALMA, (Aggressive) ROMMA, Online gradient descent, Passive aggressive, RLS, (Selective-sampling) Second order perceptron, AROW, NAROW, Confidence weighted, CELLIP, IELLIP, Normal herd, Stoptron, (Kernelized) Pegasos, MIRA, Forgetron, Projectron, Projectron++, Banditron, Ballseptron, BSGD, (Multinomial) Logistic regression, (Multinomial) Probit, SVM, Gaussian process, HMM, CRF, Bayesian Network, LVQ, (Average / Multiclass / Voted / Kernelized / Selective-sampling / Margin / Shifting / Budget / Tighter) Perceptron, PAUM, RBP, ADALINE, MLP, LMNN |
+| classification | (Fisher's) Linear discriminant, Quadratic discriminant, Mixture discriminant, Least squares, Ridge, (Complement / Negation / Universal-set / Selective) Naive Bayes (gaussian), AODE, (Fuzzy / Weighted) k-nearest neighbor, Radius neighbor, Nearest centroid, ENN, NNBCA, ADAMENN, DANN, IKNN, Decision tree, Random forest, Extra trees, GBDT, XGBoost, ALMA, (Aggressive) ROMMA, Online gradient descent, Passive aggressive, RLS, (Selective-sampling) Second order perceptron, AROW, NAROW, Confidence weighted, CELLIP, IELLIP, Normal herd, Stoptron, (Kernelized) Pegasos, MIRA, Forgetron, Projectron, Projectron++, Banditron, Ballseptron, BSGD, (Multinomial) Logistic regression, (Multinomial) Probit, SVM, Gaussian process, HMM, CRF, Bayesian Network, LVQ, (Average / Multiclass / Voted / Kernelized / Selective-sampling / Margin / Shifting / Budget / Tighter / Tightest) Perceptron, PAUM, RBP, ADALINE, MLP, LMNN |
 | semi-supervised classification | k-nearest neighbor, Radius neighbor, Label propagation, Label spreading, k-means, GMM, S3VM, Ladder network |
 | regression | Least squares, Ridge, Lasso, Elastic net, RLS, Bayesian linear, Poisson, Least absolute deviations, Huber, Tukey, Least trimmed squares, Least median squares, Lp norm linear, SMA, Deming, Segmented, LOWESS, spline, Gaussian process, Principal components, Partial least squares, Projection pursuit, Quantile regression, k-nearest neighbor, Radius neighbor, IDW, Nadaraya Watson, Priestley Chao, Gasser Muller, RBF Network, RVM, Decision tree, Random forest, Extra trees, GBDT, XGBoost, SVR, MLP, GMR, Isotonic, Ramer Douglas Peucker, Theil-Sen, Passing-Bablok, Repeated median |
 | interpolation | Nearest neighbor, IDW, (Spherical) Linear, Brahmagupta, Logarithmic, Cosine, (Inverse) Smoothstep, Cubic, (Centripetal) Catmull-Rom, Hermit, Polynomial, Lagrange, Trigonometric, Spline, RBF Network, Akima, Natural neighbor, Delaunay |
diff --git a/js/model_selector.js b/js/model_selector.js
@@ -184,6 +184,7 @@ const AIMethods = [
 				{ value: 'banditron', title: 'Banditron' },
 				{ value: 'ballseptron', title: 'Ballseptron' },
 				{ value: 'tighter_perceptron', title: 'Tighter Perceptron' },
+				{ value: 'tightest_perceptron', title: 'Tightest Perceptron' },
 				{ value: 'bsgd', title: 'BSGD' },
 			],
 			Netrowk: [
diff --git a/js/view/tightest_perceptron.js b/js/view/tightest_perceptron.js
@@ -0,0 +1,38 @@
+import TightestPerceptron from '../../lib/model/tightest_perceptron.js'
+import EnsembleBinaryModel from '../../lib/model/ensemble_binary.js'
+import Controller from '../controller.js'
+
+export default function (platform) {
+	platform.setting.ml.usage = 'Click and add data point. Then, click "Step".'
+	const controller = new Controller(platform)
+	let model = null
+	const calc = cb => {
+		if (!model) {
+			model = new EnsembleBinaryModel(function () {
+				return new TightestPerceptron(b.value, kernel.value, aloss.value)
+			}, method.value)
+			model.init(
+				platform.trainInput,
+				platform.trainOutput.map(v => v[0])
+			)
+		}
+		model.fit()
+
+		const categories = model.predict(platform.testInput(3))
+		platform.testResult(categories)
+		cb && cb()
+	}
+
+	const method = controller.select(['oneone', 'onerest'])
+	const kernel = controller.select(['gaussian', 'polynomial'])
+	const b = controller.input.number({ label: ' b ', min: 0, max: 1000, value: 10 })
+	const aloss = controller.select({ values: ['zero_one', 'hinge'], value: 'hinge' })
+	controller
+		.stepLoopButtons()
+		.init(() => {
+			model = null
+			platform.init()
+		})
+		.step(calc)
+		.epoch()
+}
diff --git a/lib/model/tightest_perceptron.js b/lib/model/tightest_perceptron.js
@@ -0,0 +1,254 @@
+const logGamma = z => {
+	// https://en.wikipedia.org/wiki/Lanczos_approximation
+	// https://slpr.sakura.ne.jp/qp/gamma-function/
+	let x = 0
+	if (Number.isInteger(z)) {
+		for (let i = 2; i < z; i++) {
+			x += Math.log(i)
+		}
+	} else if (Number.isInteger(z - 0.5)) {
+		const n = z - 0.5
+		x = Math.log(Math.sqrt(Math.PI)) - Math.log(2) * n
+		for (let i = 2 * n - 1; i > 0; i -= 2) {
+			x += Math.log(i)
+		}
+	} else if (z < 0.5) {
+		x = Math.log(Math.PI) - Math.log(Math.sin(Math.PI * z)) - logGamma(1 - z)
+	} else {
+		const p = [
+			676.5203681218851, -1259.1392167224028, 771.32342877765313, -176.61502916214059, 12.507343278686905,
+			-0.13857109526572012, 9.9843695780195716e-6, 1.5056327351493116e-7,
+		]
+		z -= 1
+		x = 0.99999999999980993
+		for (let i = 0; i < p.length; i++) {
+			x += p[i] / (z + i + 1)
+		}
+		const t = z + p.length - 0.5
+		x = Math.log(Math.sqrt(2 * Math.PI)) + Math.log(t) * (z + 0.5) - t + Math.log(x)
+	}
+	return x
+}
+
+const beta = (p, q) => {
+	// https://www2.math.kyushu-u.ac.jp/~snii/AdvancedCalculus/7-1.pdf
+	// return gamma(p) * gamma(q) / gamma(p + q)
+	return Math.exp(logGamma(p) + logGamma(q) - logGamma(p + q))
+}
+
+const hypergeometric = (a, b, c, z) => {
+	// https://qiita.com/moriokumura/items/e35025d4ade312b0a017
+	let f = 1
+	let p = 0
+	const lnz = Math.log(z)
+	for (let n = 0; n < 1000; n++) {
+		p = p + lnz + Math.log(a + n) + Math.log(b + n) - Math.log(c + n) - Math.log(1 + n)
+		const ep = Math.exp(p)
+		f += ep
+		if (Math.abs(ep / f) < 1.0e-14) {
+			break
+		}
+	}
+	return f
+}
+
+const incompleteBeta = (z, a, b) => {
+	// https://ja.wikipedia.org/wiki/%E4%B8%8D%E5%AE%8C%E5%85%A8%E3%83%99%E3%83%BC%E3%82%BF%E9%96%A2%E6%95%B0
+	// https://math-functions-1.watson.jp/sub1_spec_050.html#section030
+	// https://qiita.com/moriokumura/items/e35025d4ade312b0a017
+	if (b === 1) {
+		return z ** a / a
+	} else if (a === 1) {
+		return (1 - (1 - z) ** b) / b
+	} else if (a === 0.5 && b === 0) {
+		return 2 * Math.atanh(Math.sqrt(z))
+	} else if (a === 0.5 && b === 0.5) {
+		return 2 * Math.asin(Math.sqrt(z))
+	} else if (Number.isInteger(b)) {
+		const za = z ** a
+		let ib = za / a
+		for (let i = 1; i < b; i++) {
+			ib = (i * ib + za * (1 - z) ** i) / (a + i)
+		}
+		return ib
+	} else if (Number.isInteger(a)) {
+		const zb = (1 - z) ** b
+		let ib = (1 - zb) / b
+		for (let i = 1; i < a; i++) {
+			ib = (i * ib - z ** i * zb) / (i + b)
+		}
+		return ib
+	}
+	return (z ** a / a) * hypergeometric(a, 1 - b, a + 1, z)
+}
+
+const regularizedIncompleteBeta = (z, a, b) => {
+	// beta distribution of the first kind
+	if (z === 0) {
+		return 0
+	} else if (z === 1) {
+		return 1
+	} else if (b === 1) {
+		return z ** a
+	} else if (a === 1) {
+		return 1 - (1 - z) ** b
+	}
+	return incompleteBeta(z, a, b) / beta(a, b)
+}
+
+/**
+ * Tightest Perceptron
+ */
+export default class TightestPerceptron {
+	// Online Learning: A Comprehensive Survey
+	// https://arxiv.org/abs/1802.02871
+	// Tighter perceptron with improved dual use of cached data for model representation and validation.
+	// https://www.dabi.temple.edu/external/vucetic/documents/wang09ijcnn.pdf
+	/**
+	 * @param {number} [b=10] Budget size
+	 * @param {'gaussian' | 'polynomial' | function (number[], number[]): number} [kernel=gaussian] Kernel name
+	 * @param {'zero_one' | 'hinge'} [accuracyLoss=hinge] Accuracy loss type name
+	 */
+	constructor(b = 10, kernel = 'gaussian', accuracyLoss = 'hinge') {
+		this._b = b
+
+		if (typeof kernel === 'function') {
+			this._kernel = kernel
+		} else {
+			switch (kernel) {
+				case 'gaussian':
+					this._s = 1
+					this._kernel = (a, b) =>
+						Math.exp(-(a.reduce((s, v, i) => s + (v - b[i]) ** 2, 0) ** 2) / this._s ** 2)
+					break
+				case 'polynomial':
+					this._d = 2
+					this._kernel = (a, b) => (1 + a.reduce((s, v, i) => s + v * b[i])) ** this._d
+					break
+			}
+		}
+
+		if (accuracyLoss === 'hinge') {
+			this._accuracyLossP = y => {
+				return Math.max(0, 1 - y)
+			}
+			this._accuracyLossN = y => {
+				return Math.max(0, 1 + y)
+			}
+		} else {
+			this._accuracyLossP = y => {
+				return y < 0 ? 1 : 0
+			}
+			this._accuracyLossN = y => {
+				return y < 0 ? 0 : 1
+			}
+		}
+		this._ap = 1
+		this._an = 1
+	}
+
+	/**
+	 * Initialize this model.
+	 *
+	 * @param {Array<Array<number>>} train_x Training data
+	 * @param {Array<1 | -1>} train_y Target values
+	 */
+	init(train_x, train_y) {
+		this._x = train_x
+		this._y = train_y
+
+		this._sv = []
+	}
+
+	/**
+	 * Fit model parameters.
+	 */
+	fit() {
+		for (let i = 0; i < this._x.length; i++) {
+			let s = 0
+			for (let k = 0; k < this._sv.length; k++) {
+				const sk = this._sv[k]
+				s += sk.y * this._kernel(this._x[i], sk.x)
+			}
+			if (s * this._y[i] <= 0) {
+				if (this._y[i] === 1) {
+					this._sv.push({ x: this._x[i], y: this._y[i], cp: 1, cn: 0 })
+				} else {
+					this._sv.push({ x: this._x[i], y: this._y[i], cp: 0, cn: 1 })
+				}
+				if (this._sv.length > this._b) {
+					let min_l = Infinity
+					let min_r = -1
+					for (let k = 0; k < this._sv.length; k++) {
+						let loss = 0
+						for (let j = 0; j < this._sv.length; j++) {
+							let f = 0
+							for (let m = 0; m < this._sv.length; m++) {
+								if (m === k) {
+									continue
+								}
+								const sk = this._sv[m]
+								f += sk.y * this._kernel(this._sv[j].x, sk.x)
+							}
+							const lp = this._accuracyLossP(f)
+							const ln = this._accuracyLossN(f)
+							const wp =
+								1 - regularizedIncompleteBeta(0.5, this._sv[j].cp + this._ap, this._sv[j].cn + this._an)
+							loss += wp * lp + (1 - wp) * ln
+						}
+						loss /= this._sv.length
+						if (loss < min_l) {
+							min_l = loss
+							min_r = k
+						}
+					}
+					const sv = this._sv.splice(min_r, 1)[0]
+					this._updateSummary(sv.x, sv.cp, sv.cn)
+				}
+			} else {
+				if (this._y[i] === 1) {
+					this._updateSummary(this._x[i], 1, 0)
+				} else {
+					this._updateSummary(this._x[i], 0, 1)
+				}
+			}
+		}
+	}
+
+	_updateSummary(x, cp, cn) {
+		if (this._sv.length === 0) {
+			return
+		}
+		let min_d = Infinity
+		let min_k = -1
+		for (let i = 0; i < this._sv.length; i++) {
+			const d = this._sv[i].x.reduce((s, v, d) => s + (v - x[d]) ** 2, 0)
+			if (d < min_d) {
+				min_d = d
+				min_k = i
+			}
+		}
+		const kn = this._kernel(x, this._sv[min_k].x)
+		this._sv[min_k].cp += cp * kn
+		this._sv[min_k].cn += cn * kn
+	}
+
+	/**
+	 * Returns predicted values.
+	 *
+	 * @param {Array<Array<number>>} data Sample data
+	 * @returns {(1 | -1)[]} Predicted values
+	 */
+	predict(data) {
+		const p = []
+		for (let i = 0; i < data.length; i++) {
+			let s = 0
+			for (let k = 0; k < this._sv.length; k++) {
+				const sk = this._sv[k]
+				s += sk.y * this._kernel(data[i], sk.x)
+			}
+			p[i] = s < 0 ? -1 : 1
+		}
+		return p
+	}
+}
diff --git a/tests/gui/view/tightest_perceptron.test.js b/tests/gui/view/tightest_perceptron.test.js
@@ -0,0 +1,57 @@
+import puppeteer from 'puppeteer'
+
+import { getPage } from '../helper/browser'
+
+describe('classification', () => {
+	/** @type {puppeteer.Page} */
+	let page
+	beforeEach(async () => {
+		page = await getPage()
+	}, 10000)
+
+	afterEach(async () => {
+		await page?.close()
+	})
+
+	test('initialize', async () => {
+		const taskSelectBox = await page.waitForSelector('#ml_selector dl:first-child dd:nth-child(5) select')
+		taskSelectBox.select('CF')
+		const modelSelectBox = await page.waitForSelector('#ml_selector .model_selection #mlDisp')
+		modelSelectBox.select('tightest_perceptron')
+		const methodMenu = await page.waitForSelector('#ml_selector #method_menu')
+		const buttons = await methodMenu.waitForSelector('.buttons')
+
+		const methods = await buttons.waitForSelector('select:nth-of-type(1)')
+		await expect((await methods.getProperty('value')).jsonValue()).resolves.toBe('oneone')
+		const kernel = await buttons.waitForSelector('select:nth-of-type(2)')
+		await expect((await kernel.getProperty('value')).jsonValue()).resolves.toBe('gaussian')
+		const b = await buttons.waitForSelector('input:nth-of-type(1)')
+		await expect((await b.getProperty('value')).jsonValue()).resolves.toBe('10')
+		const aloss = await buttons.waitForSelector('select:nth-of-type(3)')
+		await expect((await aloss.getProperty('value')).jsonValue()).resolves.toBe('hinge')
+		const epoch = await buttons.waitForSelector('[name=epoch]')
+		await expect(epoch.evaluate(el => el.textContent)).resolves.toBe('0')
+	}, 10000)
+
+	test('learn', async () => {
+		const taskSelectBox = await page.waitForSelector('#ml_selector dl:first-child dd:nth-child(5) select')
+		taskSelectBox.select('CF')
+		const modelSelectBox = await page.waitForSelector('#ml_selector .model_selection #mlDisp')
+		modelSelectBox.select('tightest_perceptron')
+		const methodMenu = await page.waitForSelector('#ml_selector #method_menu')
+		const buttons = await methodMenu.waitForSelector('.buttons')
+
+		const epoch = await buttons.waitForSelector('[name=epoch]')
+		await expect(epoch.evaluate(el => el.textContent)).resolves.toBe('0')
+		const methodFooter = await page.waitForSelector('#method_footer')
+		await expect(methodFooter.evaluate(el => el.textContent)).resolves.toBe('')
+
+		const initButton = await buttons.waitForSelector('input[value=Initialize]')
+		await initButton.evaluate(el => el.click())
+		const stepButton = await buttons.waitForSelector('input[value=Step]:enabled')
+		await stepButton.evaluate(el => el.click())
+
+		await expect(epoch.evaluate(el => el.textContent)).resolves.toBe('1')
+		await expect(methodFooter.evaluate(el => el.textContent)).resolves.toMatch(/^Accuracy:[0-9.]+$/)
+	}, 10000)
+})
diff --git a/tests/lib/model/tightest_perceptron.test.js b/tests/lib/model/tightest_perceptron.test.js