Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,8 @@
},
"dependencies": {
"axios": "^0.17.1",
"he": "^1.1.1",
"lodash": "^4.17.4",
"striptags": "^3.1.0"
"xml2json": "^0.11.2"

Check warning on line 47 in package.json

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

package.json#L47

Package dependencies with variant versions may lead to dependency hijack and confusion attacks.
},
"ava": {
"babel": "inherit",
Expand Down
85 changes: 54 additions & 31 deletions src/index.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,47 @@
/* @flow */

import he from 'he';
import axios from 'axios';
import { find } from 'lodash';
import striptags from 'striptags';
import parser from 'xml2json';

const renameTextProp = ({ $t, ...caption }) => ({ ...caption, text: $t });

async function alternativeCaptionsRetrieval(videoID, lang) {
const availableCaptions = await axios.get(
`http://video.google.com/timedtext?v=${videoID}&type=list`

Check failure on line 11 in src/index.js

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

src/index.js#L11

This application allows user-controlled URLs to be passed directly to HTTP client libraries.
);
const availableCaptionsParsed = parser.toJson(availableCaptions.data, {
object: true,
});
const availableLanguages = [];
try {
availableCaptionsParsed.transcript_list.track.forEach(track =>
availableLanguages.push(track.lang_code)
);
} catch (error) {
throw new Error(`Could not find captions for video: ${videoID}`);
}
if (!availableLanguages.includes(lang)) {
throw new Error(
`Could not find ${lang} captions. Avaliable languages: ${availableLanguages.join(
', '
)}.`
);
}
const captionsForLang = await axios.get(
`http://video.google.com/timedtext?v=${videoID}&lang=${lang}`

Check failure on line 32 in src/index.js

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

src/index.js#L32

This application allows user-controlled URLs to be passed directly to HTTP client libraries.
);
const captionsForLangParsed = parser.toJson(captionsForLang.data, {
object: true,
});
let result = [];
try {
result = captionsForLangParsed.transcript.text.map(renameTextProp);
} catch (error) {
throw new Error(`Could not find captions for video: ${videoID}`);
}
return result;
}

export async function getSubtitles({
videoID,
Expand All @@ -18,9 +56,13 @@

const decodedData = decodeURIComponent(data);

// * ensure we have access to captions data
if (!decodedData.includes('captionTracks'))
throw new Error(`Could not find captions for video: ${videoID}`);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this error need to continue existing if there's nothing alternative found?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well I check and throw from this alternative method, but it might be a good thing to check here as well to reduce coupling

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bel0v how to avoid throwing error at all?

if (!decodedData.includes('captionTracks')) {
const alternativeCaptions = await alternativeCaptionsRetrieval(
videoID,
lang
);
return alternativeCaptions;
}

const regex = /({"captionTracks":.*isTranslatable":(true|false)}])/;
const [match] = regex.exec(decodedData);
Expand All @@ -40,32 +82,13 @@
throw new Error(`Could not find ${lang} captions for ${videoID}`);

const { data: transcript } = await axios.get(subtitle.baseUrl);
const lines = transcript
.replace('<?xml version="1.0" encoding="utf-8" ?><transcript>', '')
.replace('</transcript>', '')
.split('</text>')
.filter(line => line && line.trim())
.map(line => {
const startRegex = /start="([\d.]+)"/;
const durRegex = /dur="([\d.]+)"/;

const [, start] = startRegex.exec(line);
const [, dur] = durRegex.exec(line);

const htmlText = line
.replace(/<text.+>/, '')
.replace(/&amp;/gi, '&')
.replace(/<\/?[^>]+(>|$)/g, '');

const decodedText = he.decode(htmlText);
const text = striptags(decodedText);

return {
start,
dur,
text,
};
});
const transcriptParsed = parser.toJson(transcript, { object: true });
let lines = [];
try {
lines = transcriptParsed.transcript.text.map(renameTextProp);
} catch (error) {
throw new Error(`Could not find captions for ${videoID}`);
}

return lines;
}
10 changes: 10 additions & 0 deletions test/index.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,13 @@ test('Extract passive income video', async t => {
const subtitles = await getSubtitles({ videoID: 'JueUvj6X3DA' });
t.deepEqual('creating passive income takes work but', subtitles[0].text);
});

test('Try capturing subtitles not listed in captionTracks', async t => {
const subtitles = await getSubtitles({ videoID: '62xdACKITrE' });
t.deepEqual(subtitles[0], {
start: '11.8',
dur: '2.9',
text:
'Ein Flugzeug liegt im Abendwind\nA plane is flying on the evening winds',
});
});
85 changes: 77 additions & 8 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1141,6 +1141,13 @@
buffers "~0.1.1"
chainsaw "~0.1.0"

bindings@^1.5.0:
version "1.5.0"
resolved "https://registry.yarnpkg.com/bindings/-/bindings-1.5.0.tgz#10353c9e945334bc0511a6d90b38fbc7c9c504df"
integrity sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==
dependencies:
file-uri-to-path "1.0.0"

block-stream@*:
version "0.0.9"
resolved "https://registry.yarnpkg.com/block-stream/-/block-stream-0.0.9.tgz#13ebfe778a03205cfe03751481ebb4b3300c126a"
Expand Down Expand Up @@ -1933,6 +1940,11 @@
flat-cache "^1.2.1"
object-assign "^4.0.1"

file-uri-to-path@1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz#553a7b8446ff6f684359c445f1e37a05dacc33dd"
integrity sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==

filename-regex@^2.0.0:
version "2.0.1"
resolved "https://registry.yarnpkg.com/filename-regex/-/filename-regex-2.0.1.tgz#c1c4b9bee3e09725ddb106b75c1e301fe2f18b26"
Expand Down Expand Up @@ -2334,10 +2346,6 @@
hoek "4.x.x"
sntp "2.x.x"

he@^1.1.1:
version "1.1.1"
resolved "https://registry.yarnpkg.com/he/-/he-1.1.1.tgz#93410fd21b009735151f8868c2f271f3427e23fd"

hoek@2.x.x:
version "2.16.3"
resolved "https://registry.yarnpkg.com/hoek/-/hoek-2.16.3.tgz#20bb7403d3cea398e91dc4710a8ff1b8274a25ed"
Expand All @@ -2346,6 +2354,21 @@
version "4.2.0"
resolved "https://registry.yarnpkg.com/hoek/-/hoek-4.2.0.tgz#72d9d0754f7fe25ca2d01ad8f8f9a9449a89526d"

hoek@5.x.x:

Check warning on line 2357 in yarn.lock

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

yarn.lock#L2357

Insecure dependency npm/hoek@5.0.4 (CVE-2020-36604: hapi/hoek: Prototype Pollution in @hapi/hoek) (no fix available)
version "5.0.4"
resolved "https://registry.yarnpkg.com/hoek/-/hoek-5.0.4.tgz#0f7fa270a1cafeb364a4b2ddfaa33f864e4157da"
integrity sha512-Alr4ZQgoMlnere5FZJsIyfIjORBqZll5POhDsF4q64dPuJR6rNxXdDxtHSQq8OXRurhmx+PWYEE8bXRROY8h0w==

hoek@6.x.x:

Check warning on line 2362 in yarn.lock

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

yarn.lock#L2362

Insecure dependency npm/hoek@6.1.3 (CVE-2020-36604: hapi/hoek: Prototype Pollution in @hapi/hoek) (no fix available)
version "6.1.3"
resolved "https://registry.yarnpkg.com/hoek/-/hoek-6.1.3.tgz#73b7d33952e01fe27a38b0457294b79dd8da242c"
integrity sha512-YXXAAhmF9zpQbC7LEcREFtXfGq5K1fmd+4PHkBq8NUqmzW3G+Dq10bI/i0KucLRwss3YYFQ0fSfoxBZYiGUqtQ==

hoek@^4.2.1:

Check warning on line 2367 in yarn.lock

View check run for this annotation

Codacy Production / Codacy Static Code Analysis

yarn.lock#L2367

Insecure dependency npm/hoek@4.2.1 (CVE-2020-36604: hapi/hoek: Prototype Pollution in @hapi/hoek) (no fix available)
version "4.2.1"
resolved "https://registry.yarnpkg.com/hoek/-/hoek-4.2.1.tgz#9634502aa12c445dd5a7c5734b572bb8738aacbb"
integrity sha512-QLg82fGkfnJ/4iy1xZ81/9SIJiq1NGFUMGs6ParyjBZr6jW2Ufj/snDqTHixNlHdPNwN2RLVD0Pi3igeK9+JfA==

home-or-tmp@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/home-or-tmp/-/home-or-tmp-2.0.0.tgz#e36c3f2d2cae7d746a857e38d18d5f32a7882db8"
Expand Down Expand Up @@ -2668,6 +2691,13 @@
version "1.0.0"
resolved "https://registry.yarnpkg.com/isarray/-/isarray-1.0.0.tgz#bb935d48582cba168c06834957a54a3e07124f11"

isemail@3.x.x:
version "3.2.0"
resolved "https://registry.yarnpkg.com/isemail/-/isemail-3.2.0.tgz#59310a021931a9fb06bbb51e155ce0b3f236832c"
integrity sha512-zKqkK+O+dGqevc93KNsbZ/TqTUFd46MwWjYOoMrjIMZ51eU7DtQG3Wmd9SQQT7i7RVnuTPEiYEWHU3MSbxC1Tg==
dependencies:
punycode "2.x.x"

isexe@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/isexe/-/isexe-2.0.0.tgz#e8fbf374dc556ff8947a10dcb0572d633f2cfa10"
Expand All @@ -2693,6 +2723,15 @@
version "21.2.0"
resolved "https://registry.yarnpkg.com/jest-docblock/-/jest-docblock-21.2.0.tgz#51529c3b30d5fd159da60c27ceedc195faf8d414"

joi@^13.1.2:
version "13.7.0"
resolved "https://registry.yarnpkg.com/joi/-/joi-13.7.0.tgz#cfd85ebfe67e8a1900432400b4d03bbd93fb879f"
integrity sha512-xuY5VkHfeOYK3Hdi91ulocfuFopwgbSORmIwzcwHKESQhC7w1kD5jaVSPnqDxS2I8t3RZ9omCKAxNwXN5zG1/Q==
dependencies:
hoek "5.x.x"
isemail "3.x.x"
topo "3.x.x"

js-string-escape@^1.0.1:
version "1.0.1"
resolved "https://registry.yarnpkg.com/js-string-escape/-/js-string-escape-1.0.1.tgz#e2625badbc0d67c7533e9edc1068c587ae4137ef"
Expand Down Expand Up @@ -3059,6 +3098,11 @@
version "0.0.7"
resolved "https://registry.yarnpkg.com/mute-stream/-/mute-stream-0.0.7.tgz#3075ce93bc21b8fab43e1bc4da7e8115ed1e7bab"

nan@^2.13.2:
version "2.14.0"
resolved "https://registry.yarnpkg.com/nan/-/nan-2.14.0.tgz#7818f722027b2459a86f0295d434d1fc2336c52c"
integrity sha512-INOFj37C7k3AfaNTtX8RhsTw7qRy7eLET14cROi9+5HAVbbHuIWUHEauBv5qT4Av2tWasiTY1Jw6puUNqRJXQg==

nan@^2.3.0:
version "2.8.0"
resolved "https://registry.yarnpkg.com/nan/-/nan-2.8.0.tgz#ed715f3fe9de02b57a5e6252d90a96675e1f085a"
Expand All @@ -3071,6 +3115,14 @@
version "1.4.0"
resolved "https://registry.yarnpkg.com/natural-compare/-/natural-compare-1.4.0.tgz#4abebfeed7541f2c27acfb29bdbbd15c8d5ba4f7"

node-expat@^2.3.15:
version "2.3.18"
resolved "https://registry.yarnpkg.com/node-expat/-/node-expat-2.3.18.tgz#d9e6949cecda15e131f14259b73dc7b9ed7bc560"
integrity sha512-9dIrDxXePa9HSn+hhlAg1wXkvqOjxefEbMclGxk2cEnq/Y3U7Qo5HNNqeo3fQ4bVmLhcdt3YN1TZy7WMZy4MHw==
dependencies:
bindings "^1.5.0"
nan "^2.13.2"

node-pre-gyp@^0.6.39:
version "0.6.39"
resolved "https://registry.yarnpkg.com/node-pre-gyp/-/node-pre-gyp-0.6.39.tgz#c00e96860b23c0e1420ac7befc5044e1d78d8649"
Expand Down Expand Up @@ -3451,6 +3503,11 @@
setimmediate ">= 1.0.2 < 2"
slice-stream ">= 1.0.0 < 2"

punycode@2.x.x:
version "2.1.1"
resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.1.1.tgz#b58b010ac40c22c5657616c8d2c2c02c7bf479ec"
integrity sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==

punycode@^1.4.1:
version "1.4.1"
resolved "https://registry.yarnpkg.com/punycode/-/punycode-1.4.1.tgz#c0d5a63b2718800ad8e1eb0fa5269c84dd41845e"
Expand Down Expand Up @@ -3994,10 +4051,6 @@
version "2.0.1"
resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-2.0.1.tgz#3c531942e908c2697c0ec344858c286c7ca0a60a"

striptags@^3.1.0:
version "3.1.0"
resolved "https://registry.yarnpkg.com/striptags/-/striptags-3.1.0.tgz#763e534338d9cf542f004a4b1eb099e32d295e44"

supertap@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/supertap/-/supertap-1.0.0.tgz#bd9751c7fafd68c68cf8222a29892206a119fa9e"
Expand Down Expand Up @@ -4107,6 +4160,13 @@
version "2.0.0"
resolved "https://registry.yarnpkg.com/to-fast-properties/-/to-fast-properties-2.0.0.tgz#dc5e698cbd079265bc73e0377681a4e4e83f616e"

topo@3.x.x:
version "3.0.3"
resolved "https://registry.yarnpkg.com/topo/-/topo-3.0.3.tgz#d5a67fb2e69307ebeeb08402ec2a2a6f5f7ad95c"
integrity sha512-IgpPtvD4kjrJ7CRA3ov2FhWQADwv+Tdqbsf1ZnPUSAtCJ9e1Z44MmoSGDXGk4IppoZA7jd/QRkNddlLJWlUZsQ==
dependencies:
hoek "6.x.x"

tough-cookie@~2.3.0, tough-cookie@~2.3.3:
version "2.3.3"
resolved "https://registry.yarnpkg.com/tough-cookie/-/tough-cookie-2.3.3.tgz#0b618a5565b6dea90bf3425d04d55edc475a7561"
Expand Down Expand Up @@ -4341,6 +4401,15 @@
version "3.0.0"
resolved "https://registry.yarnpkg.com/xdg-basedir/-/xdg-basedir-3.0.0.tgz#496b2cc109eca8dbacfe2dc72b603c17c5870ad4"

xml2json@^0.11.2:
version "0.11.2"
resolved "https://registry.yarnpkg.com/xml2json/-/xml2json-0.11.2.tgz#70ddd234fd7818312cc58455cab8457b5bcc7c52"
integrity sha512-ZJpHpPOL0T5lOvAHMnWm59iQOPqNtam5t2TMUllWZ1k5Wm8L5YyvQnkeaVnRKCvDwY5EumqXWyOjjMdQVz272A==
dependencies:
hoek "^4.2.1"
joi "^13.1.2"
node-expat "^2.3.15"

xtend@^4.0.0, xtend@~4.0.1:
version "4.0.1"
resolved "https://registry.yarnpkg.com/xtend/-/xtend-4.0.1.tgz#a5c6d532be656e23db820efb943a1f04998d63af"
Expand Down