diff --git a/README.md b/README.md index 4a4b63e..a4632df 100644 --- a/README.md +++ b/README.md @@ -8,12 +8,84 @@ # đŸ›ī¸ wayback.js The Wayback Availability JSON API in JavaScript. -## 🚀 Usage +## 🚀 Installation ### npm ```shell npm i wayback.js ``` +## đŸ“Ļ Usage +### Create an Instance +```js +import Wayback from 'wayback.js'; + +const wb = new Wayback({ + connectionTimeoutMs: 9000, // Timeout in ms + cacheTTL: 86400, // Cache time-to-live in seconds + gcInterval: 3600, // Garbage collection interval in seconds + headers: { ... } // Optional headers +}); +``` + +### Check if a URL is Archived +```js +// Default check +const archived = await wb.isArchived('https://example.com'); +console.log(archived); + +// With options +const archivedOldest = await wb.isArchived('https://example.com', { + resolveRedirects: false, // Skip resolving redirects + oldestArchive: true // Request the oldest available archive +}); +console.log(archivedOldest); +``` + +### Save a URL +```js +const saved = await wb.saveUrl('https://example.com'); +console.log(saved); +``` + +### Save Only If Outdated +```js +// Default 30 days max age +const savedIfOld = await wb.saveOutdatedUrl('https://example.com'); +console.log(savedIfOld); + +// With options +const savedIfOldest = await wb.saveOutdatedUrl( + 'https://example.com', + 90, // Maximum age (in days) + resolveRedirects: false // Skip resolving redirects +); +console.log(savedIfOldest); +``` + +### Resolve Redirects Before Archiving +```js +const finalUrl = await wb.getFinalRedirectUrl('https:/g.co/gsoc'); +console.log(finalUrl); +``` + +## 📚 API Reference +| Method | Description | +| ------------------------------ | ------------------------------------------------------------------ | +| `isArchived` | Checks if the given URL is archived and returns snapshot info. | +| `saveUrl` | Saves the given URL to the Wayback Machine. | +| `saveOutdatedUrl` | Saves the URL only if the last archive is older than `maxAgeDays`. | +| `getFinalRedirectUrl` | Resolves redirects and returns the final destination URL. | + +### Options +**`isArchived` Options:** +* **resolveRedirects** *(boolean)* — whether to follow redirects before checking archive (default: `true`) +* **oldestArchive** *(boolean)* — if `true`, retrieves the oldest available snapshot instead of the latest (default: `false`) + +**`saveOutdatedUrl` Options:** +* **maxAgeDays** *(number)* — maximum age (in days) of the last archive before re-saving (default: `30`) +* **resolveRedirects** *(boolean)* — whether to follow redirects before checking archive age (default: `true`) + + ## â„šī¸ Info ### Dependents This package is a dependency of: diff --git a/src/wayback.js b/src/wayback.js index c65427c..be223ef 100644 --- a/src/wayback.js +++ b/src/wayback.js @@ -35,18 +35,28 @@ class Wayback { Wayback.#registry.unregister(this); } - async isArchived(url, resolveRedirects = true) { + async isArchived(url, options = {}) { + const opts = { + resolveRedirects: true, + oldestArchive: false, + ...options + }; + const cachedEntry = this.#getCache(url); if (cachedEntry) { return cachedEntry; } try { - if (resolveRedirects) { + if (opts.resolveRedirects) { url = await this.getFinalRedirectUrl(url); } - const response = await this.#fetch(`${this.#baseApiUrl}?timestamp=${Wayback.currentTimestamp()}&url=${encodeURIComponent(url.replace(/^https?:\/\//, ''))}`, { method: 'GET' }); + const timestamp = opts.oldestArchive + ? '19950301190227' // March 1, 1995 at 19:02:27 UTC + : Wayback.currentTimestamp(); + + const response = await this.#fetch(`${this.#baseApiUrl}?timestamp=${timestamp}&url=${encodeURIComponent(url.replace(/^https?:\/\//, ''))}`, { method: 'GET' }); if (!response) { return null; } @@ -100,7 +110,7 @@ class Wayback { } async saveOutdatedUrl(url, maxAgeDays = 30, resolveRedirects = true) { - const snapshot = await this.isArchived(url, resolveRedirects); + const snapshot = await this.isArchived(url, { resolveRedirects }); if (!snapshot || !snapshot.timestamp) { return this.saveUrl(url); } diff --git a/tests/wayback.test.js b/tests/wayback.test.js index 8a0dbe4..965fd5f 100644 --- a/tests/wayback.test.js +++ b/tests/wayback.test.js @@ -28,6 +28,19 @@ describe('Wayback class tests', function () { assert.deepStrictEqual(result, { url: 'http://example.com/123', timestamp: '20220101' }); }); + it('should return oldest archived snapshot', async function () { + global.fetch = async () => ( + { + ok: true, + json: async () => ({ + archived_snapshots: { closest: { url: 'http://web.archive.org/web/20020120142510/http://example.com:80/', timestamp: '20020120142510' } } + }) + }); + + const result = await wayback.isArchived('http://example.com', { oldestArchive: true }); + assert.deepStrictEqual(result, { url: 'http://web.archive.org/web/20020120142510/http://example.com:80/', timestamp: '20020120142510' }); + }); + it('should return HTTP status 200 if URL is not archived', async function () { global.fetch = async () => ({ ok: true,