Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
# [1.5.0](https://github.com/BuilderIO/gpt-crawler/compare/v1.4.0...v1.5.0) (2024-07-05)


### Features

* git clone depth limit in docker ([87767db](https://github.com/BuilderIO/gpt-crawler/commit/87767dbda99b3259d44ec2c02dceb3a59bb2ca3c))
- git clone depth limit in docker ([87767db](https://github.com/BuilderIO/gpt-crawler/commit/87767dbda99b3259d44ec2c02dceb3a59bb2ca3c))

# [1.4.0](https://github.com/BuilderIO/gpt-crawler/compare/v1.3.0...v1.4.0) (2024-01-15)

Expand Down
9 changes: 9 additions & 0 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,15 @@ export const configSchema = z.object({
* @default ""
*/
exclude: z.string().or(z.array(z.string())).optional(),
/**
* Set Crawlee strategy to check certain parts of the URLs found.
* @example "same-origin"
* @default "same-hostname"
* @see https://crawlee.dev/api/core/enum/EnqueueStrategy
*/
crawlStrategy: z
.enum(["all", "same-origin", "same-hostname", "same-domain"])
.optional(),
/**
* Selector to grab the inner text from
* @example ".docs-builder-container"
Expand Down
6 changes: 5 additions & 1 deletion src/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,11 @@ export async function crawl(config: Config) {
exclude:
typeof config.exclude === "string"
? [config.exclude]
: config.exclude ?? [],
: (config.exclude ?? []),
strategy:
typeof config.crawlStrategy === "string"
? config.crawlStrategy
: undefined,
});
},
// Comment this option to scrape the full website.
Expand Down