Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 104 additions & 17 deletions src/views/CrawlView.vue
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@
Allow External Links
</label>
<label class="checkbox-label">
<input type="checkbox" v-model="formData.crawlerOptions.navigateBacklinks" />
Navigate Backlinks
<input type="checkbox" v-model="formData.crawlerOptions.allowBackwardLinks" />
Allow Backward Links
</label>
</div>
</div>
Expand Down Expand Up @@ -174,6 +174,15 @@
min="0"
/>
</div>
<div class="form-group">
<label for="maxAge">Cache Max Age (ms):</label>
<input
id="maxAge"
v-model.number="formData.scrapeOptions.maxAge"
type="number"
min="0"
/>
</div>
<label class="checkbox-label">
<input type="checkbox" v-model="formData.scrapeOptions.skipTlsVerification" />
Skip TLS Verification
Expand All @@ -182,6 +191,14 @@
<input type="checkbox" v-model="formData.scrapeOptions.blockAds" />
Block Ads
</label>
<label class="checkbox-label">
<input type="checkbox" v-model="formData.scrapeOptions.parsePDF" />
Parse PDF
</label>
<label class="checkbox-label">
<input type="checkbox" v-model="formData.scrapeOptions.storeInCache" />
Store in Cache
</label>
<div class="form-group">
<label for="proxy">Proxy:</label>
<select id="proxy" v-model="formData.scrapeOptions.proxy">
Expand Down Expand Up @@ -231,6 +248,15 @@
v-model="formData.scrapeOptions.jsonOptions.prompt"
></textarea>
</div>
<div class="form-group">
<label for="actions">Actions (JSON):</label>
<textarea
id="actions"
v-model="actionsInput"
@blur="parseActions"
placeholder='[{"type":"wait","milliseconds":1000}]'
></textarea>
</div>
<div class="form-group">
<label for="changeModes">Change Tracking Modes:</label>
<input
Expand Down Expand Up @@ -335,6 +361,7 @@
</div>
<p>{{ progress }}% Completed</p>
<p>{{ pagesCompleted }} / {{ totalPages }} pages processed</p>
<button class="primary-button" type="button" @click="cancelActiveCrawl">Cancel Crawl</button>
</div>

<!-- Section for download options after crawl completion -->
Expand Down Expand Up @@ -455,16 +482,16 @@ import {
* Interface for Crawler Options section of the form.
*/
interface CrawlerOptions {
includes?: string[];
excludes?: string[];
includePaths?: string[];
excludePaths?: string[];
maxDepth?: number;
maxDiscoveryDepth?: number;
ignoreSitemap?: boolean;
ignoreQueryParameters?: boolean;
limit?: number;
delay?: number;
allowExternalLinks?: boolean;
navigateBacklinks?: boolean;
allowBackwardLinks?: boolean;
}

/**
Expand All @@ -481,8 +508,10 @@ interface ScrapeOptions {
mobile?: boolean;
removeBase64Images?: boolean;
actions?: any[]; // Based on OpenAPI, actions is an array of Action objects
maxAge?: number;
skipTlsVerification?: boolean;
timeout?: number;
parsePDF?: boolean;
jsonOptions?: {
schema?: object;
systemPrompt?: string;
Expand All @@ -492,6 +521,7 @@ interface ScrapeOptions {
country?: string;
languages?: string[];
};
storeInCache?: boolean;
blockAds?: boolean;
proxy?: string;
changeTrackingOptions?: {
Expand Down Expand Up @@ -544,31 +574,34 @@ export default defineComponent({
const formData = ref<FormData>({
url: '',
crawlerOptions: {
includes: [],
excludes: [],
includePaths: [],
excludePaths: [],
maxDepth: undefined,
maxDiscoveryDepth: undefined,
ignoreSitemap: false,
ignoreQueryParameters: false,
limit: undefined,
delay: undefined,
allowExternalLinks: false,
navigateBacklinks: false,
allowBackwardLinks: false,
},
scrapeOptions: {
formats: ['markdown'],
onlyMainContent: true,
includeTags: [],
excludeTags: [],
headers: {},
maxAge: 0,
waitFor: undefined,
mobile: false,
removeBase64Images: false,
actions: [],
parsePDF: true,
skipTlsVerification: false,
timeout: undefined,
jsonOptions: {},
location: { country: undefined, languages: [] },
storeInCache: true,
blockAds: true,
proxy: undefined,
changeTrackingOptions: {},
Expand All @@ -592,6 +625,7 @@ export default defineComponent({
const jsonOptionsSchemaInput = ref('');
const changeTrackingSchemaInput = ref('');
const changeTrackingModesInput = ref('');
const actionsInput = ref('');

// State for collapsible sections
const isCrawlerOptionsCollapsed = ref(true);
Expand All @@ -605,21 +639,21 @@ export default defineComponent({

/**
* Parses a comma-separated string of regex patterns from the includes input
* and updates the formData.crawlerOptions.includes array.
* and updates the formData.crawlerOptions.includePaths array.
*/
const parseIncludes = () => {
formData.value.crawlerOptions.includes = includesInput.value
formData.value.crawlerOptions.includePaths = includesInput.value
.split(',')
.map((s) => s.trim())
.filter(Boolean);
};

/**
* Parses a comma-separated string of regex patterns from the excludes input
* and updates the formData.crawlerOptions.excludes array.
* and updates the formData.crawlerOptions.excludePaths array.
*/
const parseExcludes = () => {
formData.value.crawlerOptions.excludes = excludesInput.value
formData.value.crawlerOptions.excludePaths = excludesInput.value
.split(',')
.map((s) => s.trim())
.filter(Boolean);
Expand Down Expand Up @@ -731,6 +765,20 @@ export default defineComponent({
.filter(Boolean);
};

/**
* Parses the actionsInput string as JSON and updates
* formData.scrapeOptions.actions. Sets an error message if parsing fails.
*/
const parseActions = () => {
try {
formData.value.scrapeOptions.actions = actionsInput.value
? JSON.parse(actionsInput.value)
: [];
} catch (e: any) {
error.value = `Invalid JSON for actions: ${e.message}`;
}
};

const loading = ref(false);
const crawling = ref(false);
const progress = ref(0);
Expand Down Expand Up @@ -818,8 +866,8 @@ export default defineComponent({
// Sync text inputs with values from the selected crawl if available
const crawl = crawlHistory.value.find((c) => c.id === id);
if (crawl && crawl.crawlerOptions) {
includesInput.value = (crawl.crawlerOptions.includes || []).join(', ');
excludesInput.value = (crawl.crawlerOptions.excludes || []).join(', ');
includesInput.value = (crawl.crawlerOptions.includePaths || []).join(', ');
excludesInput.value = (crawl.crawlerOptions.excludePaths || []).join(', ');
}
if (crawl && crawl.scrapeOptions) {
includeTagsInput.value = (crawl.scrapeOptions.includeTags || []).join(', ');
Expand All @@ -833,6 +881,9 @@ export default defineComponent({
: '';
changeTrackingModesInput.value =
crawl.scrapeOptions.changeTrackingOptions?.modes?.join(', ') || '';
actionsInput.value = crawl.scrapeOptions.actions
? JSON.stringify(crawl.scrapeOptions.actions)
: '';
}
} catch (err: any) {
console.error(`Error fetching crawl files for ID ${id}:`, err);
Expand Down Expand Up @@ -1146,6 +1197,7 @@ export default defineComponent({
parseJsonOptionsSchema();
parseChangeTrackingSchema();
parseChangeTrackingModes();
parseActions();
parseWebhookHeaders();
parseWebhookMetadata();

Expand All @@ -1166,20 +1218,23 @@ export default defineComponent({
const payload: any = { url: formData.value.url };

const crawler = formData.value.crawlerOptions;
if (crawler.excludes && crawler.excludes.length > 0) payload.excludePaths = crawler.excludes;
if (crawler.includes && crawler.includes.length > 0) payload.includePaths = crawler.includes;
if (crawler.excludePaths && crawler.excludePaths.length > 0)
payload.excludePaths = crawler.excludePaths;
if (crawler.includePaths && crawler.includePaths.length > 0)
payload.includePaths = crawler.includePaths;
if (crawler.maxDepth !== undefined) payload.maxDepth = crawler.maxDepth;
if (crawler.maxDiscoveryDepth !== undefined)
payload.maxDiscoveryDepth = crawler.maxDiscoveryDepth;
if (crawler.ignoreSitemap) payload.ignoreSitemap = true;
if (crawler.ignoreQueryParameters) payload.ignoreQueryParameters = true;
if (crawler.limit !== undefined) payload.limit = crawler.limit;
if (crawler.delay !== undefined) payload.delay = crawler.delay;
if (crawler.navigateBacklinks) payload.allowBackwardLinks = true;
if (crawler.allowBackwardLinks) payload.allowBackwardLinks = true;
if (crawler.allowExternalLinks) payload.allowExternalLinks = true;

const scrape = formData.value.scrapeOptions;
const scrapePayload: any = {};
if (scrape.maxAge !== undefined && scrape.maxAge > 0) scrapePayload.maxAge = scrape.maxAge;
if (scrape.formats && scrape.formats.length > 0) scrapePayload.formats = scrape.formats;
// Only include onlyMainContent if it's explicitly false, as default is true
if (scrape.onlyMainContent === false) scrapePayload.onlyMainContent = false;
Expand All @@ -1193,6 +1248,7 @@ export default defineComponent({
if (scrape.mobile) scrapePayload.mobile = true;
if (scrape.removeBase64Images) scrapePayload.removeBase64Images = true;
if (scrape.actions && scrape.actions.length > 0) scrapePayload.actions = scrape.actions;
if (scrape.parsePDF === false) scrapePayload.parsePDF = false;
if (scrape.skipTlsVerification) scrapePayload.skipTlsVerification = true;
if (scrape.timeout !== undefined) scrapePayload.timeout = scrape.timeout;
if (scrape.jsonOptions) {
Expand All @@ -1210,6 +1266,7 @@ export default defineComponent({
loc.languages = scrape.location.languages;
if (Object.keys(loc).length > 0) scrapePayload.location = loc;
}
if (scrape.storeInCache === false) scrapePayload.storeInCache = false;
// Only include blockAds if it's explicitly false, as default is true
if (scrape.blockAds === false) scrapePayload.blockAds = false;
if (scrape.proxy) scrapePayload.proxy = scrape.proxy;
Expand Down Expand Up @@ -1361,6 +1418,33 @@ export default defineComponent({
}, 1000); // Poll every 1 second
};

/**
* Cancel the active crawl job.
*
* @returns {Promise<void>} A promise that resolves when the cancellation request finishes.
*/
const cancelActiveCrawl = async (): Promise<void> => {
if (!result.value?.id) {
return;
}
try {
await api.crawling.cancelCrawl(result.value.id);
if (intervalId) {
clearInterval(intervalId);
intervalId = null;
}
crawling.value = false;
crawlStatus.value = 'cancelled';
const historyItem = crawlHistory.value.find((c) => c.id === result.value.id);
if (historyItem) {
historyItem.status = 'cancelled';
saveHistory();
}
} catch (err: any) {
error.value = `Failed to cancel crawl: ${err.message || err}`;
}
};

onMounted(() => {
// Load history from LocalStorage on component mount
const savedHistory = localStorage.getItem(HISTORY_STORAGE_KEY);
Expand Down Expand Up @@ -1395,6 +1479,7 @@ export default defineComponent({
jsonOptionsSchemaInput,
changeTrackingSchemaInput,
changeTrackingModesInput,
actionsInput,
parseIncludes,
parseExcludes,
parseIncludeTags,
Expand All @@ -1403,10 +1488,12 @@ export default defineComponent({
parseJsonOptionsSchema,
parseChangeTrackingSchema,
parseChangeTrackingModes,
parseActions,
parseWebhookHeaders,
parseWebhookMetadata,
loading,
crawling,
cancelActiveCrawl,
progress,
pagesCompleted,
totalPages,
Expand Down