diff --git a/src/Plugins/Totletheyn/AutomateExtension.cs b/src/Plugins/Totletheyn/AutomateExtension.cs index db822ca..16c3075 100644 --- a/src/Plugins/Totletheyn/AutomateExtension.cs +++ b/src/Plugins/Totletheyn/AutomateExtension.cs @@ -14,6 +14,7 @@ public class AutomateExtension : MossExtension public static ulong Register() { TaskScheduler.Activator.Register("rss"); + TaskScheduler.Activator.Register("crawler"); Init(); diff --git a/src/Plugins/Totletheyn/Core/ICrawler.cs b/src/Plugins/Totletheyn/Core/ICrawler.cs new file mode 100644 index 0000000..4d26bcf --- /dev/null +++ b/src/Plugins/Totletheyn/Core/ICrawler.cs @@ -0,0 +1,9 @@ +using System.Collections.Generic; + +namespace Totletheyn.Core; + +public interface ICrawler +{ + bool IsNewIssueAvailable(); + IEnumerable GetNewIssues(); +} \ No newline at end of file diff --git a/src/Plugins/Totletheyn/Core/Issue.cs b/src/Plugins/Totletheyn/Core/Issue.cs new file mode 100644 index 0000000..f9382e8 --- /dev/null +++ b/src/Plugins/Totletheyn/Core/Issue.cs @@ -0,0 +1,3 @@ +namespace Totletheyn.Core; + +public record Issue(string Title, string pdfUrl); \ No newline at end of file diff --git a/src/Plugins/Totletheyn/Jobs/CrawlerJob.cs b/src/Plugins/Totletheyn/Jobs/CrawlerJob.cs new file mode 100644 index 0000000..f9f172c --- /dev/null +++ b/src/Plugins/Totletheyn/Jobs/CrawlerJob.cs @@ -0,0 +1,13 @@ +using Moss.NET.Sdk.Scheduler; +using Totletheyn.Core; + +namespace Totletheyn.Jobs; + +public class CrawlerJob : Job +{ + + public override void Run() + { + + } +} \ No newline at end of file diff --git a/src/Plugins/Totletheyn/crawler-infos.txt b/src/Plugins/Totletheyn/crawler-infos.txt new file mode 100644 index 0000000..89989b7 --- /dev/null +++ b/src/Plugins/Totletheyn/crawler-infos.txt @@ -0,0 +1,16 @@ +crawler: + frauenhofer(https://www.fraunhofer.de/de/mediathek/publikationen/fraunhofer-magazin.html): + title: document.querySelectorAll("h3.teaser-default__text-headline")[*].innerText + pdf: document.querySelectorAll(".file-pdf > a")[*].href; + + mpf(https://www.mpg.de/maxplanckforschung): + title: document.querySelectorAll("h3")[*].innerText + pdf: document.querySelectorAll(".mpg-icon-pdf")[0].href + + hello world(https://www.raspberrypi.org/hello-world): + title: document.querySelectorAll("h3")[*].innerText + pdf: document.querySelectorAll(".pk-c-detailed-link-card__link")[*].href -> every 4th is the pdf + + paged out (https://pagedout.institute/rss.xml): with rss feed: + Paged Out! #6 + https://pagedout.institute/download/PagedOut_006.pdf diff --git a/src/Plugins/Totletheyn/plugin.conf b/src/Plugins/Totletheyn/plugin.conf index 3cea6bb..5ec5cd6 100644 --- a/src/Plugins/Totletheyn/plugin.conf +++ b/src/Plugins/Totletheyn/plugin.conf @@ -13,6 +13,19 @@ ] } } + + crawler { + class = "crawler" + mode = only new + options { + providers: [ + "helloworld", + "pagedout", + "frauenhofer", + "mpf" + ] + } + } } }