Skip to content

Commit c7161f2

Browse files
committed
[components] Scrapeless - new actions
- Introduced a new README.md for Scrapeless, outlining its features and getting started guide. - Implemented multiple Scrapeless actions inclues `Crawler`, `Scraping API`, and `Universal Scraping API`.
1 parent 8616b6d commit c7161f2

File tree

7 files changed

+1614
-0
lines changed

7 files changed

+1614
-0
lines changed

components/scrapeless/README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Overview
2+
3+
Scrapeless – your go-to platform for powerful, compliant web data extraction. With tools like Universal Scraping API, Scrapeless makes it easy to access and gather data from complex sites. Focus on insights while we handle the technical hurdles. Scrapeless – data extraction made simple.
4+
5+
# Example Use Cases
6+
7+
1. **Scraping API**: Endpoints for fresh, structured data from 100+ popular sites.
8+
2. **Universal Scraping API**: Access any website at scale and say goodbye to blocks.
9+
3. **Crawler**: Extract data from single pages or traverse entire domains.
10+
11+
# Getting Started
12+
13+
## Generating an API Key
14+
15+
1. If you are not a member of Scrapeless, you can sign up for a free account at [Scrapeless](https://app.scrapeless.com/passport/register).
16+
2. Once registered, you can go to the API Key Management page to generate an API Key in the app settings.
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import scrapeless from "../../scrapeless.app.mjs";
2+
3+
export default {
4+
key: "crawler-api",
5+
name: "Crawler",
6+
description: "Crawl any website at scale and say goodbye to blocks.",
7+
version: "0.0.1",
8+
type: "action",
9+
props: {
10+
scrapeless,
11+
apiServer: {
12+
type: "string",
13+
label: "Please select a API server",
14+
default: 'crawl',
15+
options: [
16+
{
17+
label: "Crawl",
18+
value: 'crawl'
19+
},
20+
{
21+
label: "Scrape",
22+
value: 'scrape'
23+
}
24+
],
25+
reloadProps: true,
26+
},
27+
},
28+
async run({ $ }) {
29+
const { apiServer, ...rest } = this;
30+
31+
if (apiServer === 'crawl') {
32+
const submitData = {
33+
limit: rest.limitCrawlPages,
34+
url: rest.url,
35+
}
36+
const response = await this.scrapeless.crawlerCrawl({
37+
$,
38+
submitData,
39+
...rest,
40+
});
41+
42+
$.export("$summary", `Successfully retrieved crawling results for ${rest.url}`);
43+
return response;
44+
}
45+
46+
if (apiServer === 'scrape') {
47+
const submitData = {
48+
url: rest.url,
49+
}
50+
const response = await this.scrapeless.crawlerScrape({
51+
$,
52+
submitData,
53+
...rest,
54+
});
55+
56+
$.export("$summary", `Successfully retrieved scraping results for ${rest.url}`);
57+
return response;
58+
}
59+
},
60+
async additionalProps() {
61+
const { apiServer } = this;
62+
63+
const props = {};
64+
65+
if (apiServer === 'crawl' || apiServer === 'scrape') {
66+
props.url = {
67+
type: "string",
68+
label: "URL to Crawl",
69+
description: "If you want to crawl in batches, please refer to the SDK of the document",
70+
}
71+
}
72+
73+
if (apiServer === 'crawl') {
74+
props.limitCrawlPages = {
75+
type: "integer",
76+
label: "Number Of Subpages",
77+
default: 5,
78+
description: "Max number of results to return",
79+
}
80+
}
81+
82+
return props;
83+
}
84+
}
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import scrapeless from "../../scrapeless.app.mjs";
2+
3+
export default {
4+
key: "scrapeless-scraping-api",
5+
name: "Scraping API",
6+
description: "Endpoints for fresh, structured data from 100+ popular sites.",
7+
version: "0.0.1",
8+
type: "action",
9+
props: {
10+
scrapeless,
11+
apiServer: {
12+
type: "string",
13+
label: "Please select a API server",
14+
default: 'googleSearch',
15+
options: [
16+
{
17+
label: "Google Search",
18+
value: 'googleSearch'
19+
}
20+
],
21+
reloadProps: true,
22+
},
23+
},
24+
async run({ $ }) {
25+
const { apiServer, ...rest } = this;
26+
27+
if (apiServer === 'googleSearch') {
28+
const submitData = {
29+
actor: 'scraper.google.search',
30+
input: {
31+
q: rest.q,
32+
hl: rest.hl,
33+
gl: rest.gl,
34+
}
35+
}
36+
const response = await this.scrapeless.scrapingApi({
37+
$,
38+
submitData,
39+
...rest,
40+
});
41+
42+
$.export("$summary", `Successfully retrieved scraping results for Google Search`);
43+
return response;
44+
}
45+
},
46+
async additionalProps() {
47+
const { apiServer } = this;
48+
49+
const props = {};
50+
51+
if (apiServer === 'googleSearch') {
52+
props.q = {
53+
type: "string",
54+
label: "Search Query",
55+
description: "Parameter defines the query you want to search. You can use anything that you would use in a regular Google search. e.g. inurl:, site:, intitle:. We also support advanced search query parameters such as as_dt and as_eq.",
56+
default: "coffee"
57+
}
58+
59+
props.hl = {
60+
type: "string",
61+
label: "Language",
62+
description: "Parameter defines the language to use for the Google search. It's a two-letter language code. (e.g., en for English, es for Spanish, or fr for French).",
63+
default: "en"
64+
}
65+
66+
props.gl = {
67+
type: "string",
68+
label: "Country",
69+
description: "Parameter defines the country to use for the Google search. It's a two-letter country code. (e.g., us for the United States, uk for United Kingdom, or fr for France).",
70+
default: "us"
71+
}
72+
}
73+
74+
return props;
75+
}
76+
}
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import scrapeless from "../../scrapeless.app.mjs";
2+
import { countryOptions } from "../../common/constants.mjs";
3+
4+
export default {
5+
key: "scrapeless-universal-scraping-api",
6+
name: "Universal Scraping API",
7+
description: "Access any website at scale and say goodbye to blocks.",
8+
version: "0.0.1",
9+
type: "action",
10+
props: {
11+
scrapeless,
12+
apiServer: {
13+
type: "string",
14+
label: "Please select a API server",
15+
default: 'webUnlocker',
16+
options: [
17+
{
18+
label: "Web Unlocker",
19+
value: 'webUnlocker'
20+
}
21+
],
22+
reloadProps: true,
23+
},
24+
},
25+
async run({ $ }) {
26+
const { apiServer, ...rest } = this;
27+
28+
if (apiServer === 'webUnlocker') {
29+
const submitData = {
30+
actor: 'unlocker.webunlocker',
31+
input: {
32+
url: rest.url,
33+
jsRender: rest.jsRender,
34+
headless: rest.headless,
35+
},
36+
proxy: {
37+
country: rest.country,
38+
}
39+
}
40+
const response = await this.scrapeless.universalScrapingApi({
41+
$,
42+
submitData,
43+
...rest,
44+
});
45+
46+
$.export("$summary", `Successfully retrieved scraping results for Web Unlocker`);
47+
return response;
48+
}
49+
},
50+
async additionalProps() {
51+
const { apiServer } = this;
52+
53+
const props = {};
54+
55+
if (apiServer === 'webUnlocker') {
56+
props.url = {
57+
type: "string",
58+
label: "Target URL",
59+
description: "Parameter defines the URL you want to scrape.",
60+
}
61+
62+
props.jsRender = {
63+
type: "boolean",
64+
label: "Js Render",
65+
default: true,
66+
}
67+
68+
props.headless = {
69+
type: "boolean",
70+
label: "Headless",
71+
default: true,
72+
}
73+
74+
props.country = {
75+
type: "string",
76+
label: "Country",
77+
default: "ANY",
78+
options: countryOptions.map((country) => ({
79+
label: country.label,
80+
value: country.value,
81+
}))
82+
}
83+
}
84+
85+
return props;
86+
}
87+
}

0 commit comments

Comments
 (0)