diff --git a/plugin/moyu_calendar/calendar.go b/plugin/moyu_calendar/calendar.go index 867e2abfbe..883581581a 100644 --- a/plugin/moyu_calendar/calendar.go +++ b/plugin/moyu_calendar/calendar.go @@ -12,15 +12,20 @@ import ( "strings" "time" - "github.com/FloatTech/zbputils/binary" control "github.com/FloatTech/zbputils/control" + "github.com/FloatTech/zbputils/control/order" "github.com/FloatTech/zbputils/process" zero "github.com/wdvxdr1123/ZeroBot" "github.com/wdvxdr1123/ZeroBot/message" - "github.com/FloatTech/zbputils/control/order" + xpath "github.com/antchfx/htmlquery" ) +var ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36" +var weixin = regexp.MustCompile(`url \+= '(.+)';`) + +var client = &http.Client{} + func init() { control.Register("moyucalendar", order.AcquirePrio(), &control.Options{ DisableOnDefault: true, @@ -29,20 +34,41 @@ func init() { "- /禁用 moyucalendar", }).OnFullMatch("摸鱼人日历").SetBlock(true). Handle(func(ctx *zero.Ctx) { - image, err := crew() + title := fmt.Sprintf("摸鱼人日历 %d月%d日", time.Now().Month(), time.Now().Day()) + sg, cookies, err := sougou(title, "摸鱼人日历", ua) + if err != nil { + ctx.SendChain(message.Text("ERROR: ", err)) + return + } + wx, err := redirect(sg, cookies, ua) + if err != nil { + ctx.SendChain(message.Text("ERROR: ", err)) + return + } + image, err := calendar(wx, ua) if err != nil { ctx.SendChain(message.Text("ERROR: ", err)) + return } ctx.SendChain(message.Image(image)) }) - // 定时任务每天8点执行一次 + // 定时任务每天8点30分执行一次 _, err := process.CronTab.AddFunc("30 8 * * *", func() { m, ok := control.Lookup("moyucalendar") if !ok { return } - image, err := crew() + title := fmt.Sprintf("摸鱼人日历 %d月%d日", time.Now().Month(), time.Now().Day()) + sg, cookies, err := sougou(title, "摸鱼人日历", ua) + if err != nil { + return + } + wx, err := redirect(sg, cookies, ua) + if err != nil { + return + } + image, err := calendar(wx, ua) if err != nil { return } @@ -62,63 +88,83 @@ func init() { } } -var newest = regexp.MustCompile(`href="(/link.+?)" id="sogou_vr_11002601_title_0" uigs="article_title_0"`) -var weixin = regexp.MustCompile(`url \+= '(.+)';`) -var calendar = regexp.MustCompile(`data-src="(.{0,300})" data-type="png" data-w="540"`) - -func crew() (string, error) { - client := &http.Client{} +func sougou(title, publisher, ua string) (string, []*http.Cookie, error) { u, _ := url.Parse("https://weixin.sogou.com/weixin") u.RawQuery = url.Values{ "type": []string{"2"}, "s_from": []string{"input"}, - "query": []string{fmt.Sprintf("摸鱼人日历 %d月%d日", time.Now().Month(), time.Now().Day())}, + "query": []string{title}, }.Encode() req, err := http.NewRequest("GET", u.String(), nil) if err != nil { - return "", err + return "", nil, err } - req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36") + req.Header.Set("User-Agent", ua) resp, err := client.Do(req) if err != nil { - return "", err + return "", nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { - return "", errors.New("status not ok") + return "", nil, errors.New("status not ok") } - b, err := io.ReadAll(resp.Body) + // 解析XPATH + doc, err := xpath.Parse(resp.Body) if err != nil { - return "", err + return "", nil, err } - match := newest.FindStringSubmatch(binary.BytesToString(b)) - if len(match) < 2 { - return "", errors.New("newest not found") + // 取出每个返回的结果 + list := xpath.Find(doc, `//*[@class="news-list"]/li/div[@class="txt-box"]`) + if len(list) == 0 { + return "", nil, errors.New("sougou result not found") } - var link = "https://weixin.sogou.com" + match[1] - reqa, err := http.NewRequest("GET", link, nil) + var match string + for i := range list { + account := xpath.FindOne(list[i], `//div[@class="s-p"]/a[@class="account"]`) + if account == nil { + continue + } + if xpath.InnerText(account) != publisher { + continue + } + target := xpath.FindOne(list[i], `//h3/a[@target="_blank"]`) + if target == nil { + continue + } + match = xpath.SelectAttr(target, "href") + break + } + if match == "" { + return "", nil, errors.New("sougou result not found") + } + return "https://weixin.sogou.com" + match, resp.Cookies(), nil +} + +func redirect(link string, cookies []*http.Cookie, ua string) (string, error) { + req, err := http.NewRequest("GET", link, nil) if err != nil { return "", err } - reqa.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36") - var cookies = make([]string, 0, 4) - for _, cookie := range resp.Cookies() { - if cookie.Name != "ABTEST" && cookie.Name != "SNUID" && cookie.Name != "IPLOC" && cookie.Name != "SUID" { + req.Header.Set("User-Agent", ua) + var c = make([]string, 0, 4) + for _, cookie := range cookies { + if cookie.Name != "ABTEST" && cookie.Name != "SNUID" && + cookie.Name != "IPLOC" && cookie.Name != "SUID" { continue } - cookies = append(cookies, cookie.Name+"="+cookie.Value) + c = append(c, cookie.Name+"="+cookie.Value) } - reqa.Header.Set("Cookie", strings.Join(cookies, "; ")) - respa, err := client.Do(reqa) + req.Header.Set("Cookie", strings.Join(c, "; ")) + resp, err := client.Do(req) if err != nil { return "", err } - defer respa.Body.Close() - if respa.StatusCode != http.StatusOK { + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { return "", errors.New("status not ok") } - br := bufio.NewReader(respa.Body) - var weixinurl = make([]string, 0) + br := bufio.NewReader(resp.Body) + var u = make([]string, 0) for { b, _, err := br.ReadLine() if err == io.EOF { @@ -127,39 +173,54 @@ func crew() (string, error) { if err != nil { return "", err } - matcha := weixin.FindStringSubmatch(binary.BytesToString(b)) + matcha := weixin.FindStringSubmatch(string(b)) if len(matcha) < 2 { continue } - weixinurl = append(weixinurl, strings.ReplaceAll(matcha[1], "@", "")) + u = append(u, strings.ReplaceAll(matcha[1], "@", "")) } - if len(weixinurl) == 0 { + if len(u) == 0 { return "", errors.New("weixin url not found") } - reqw, err := http.NewRequest("GET", strings.Join(weixinurl, ""), nil) - reqa.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36") + return strings.Join(u, ""), nil +} + +func calendar(link, ua string) (string, error) { + req, err := http.NewRequest("GET", link, nil) + req.Header.Set("User-Agent", ua) if err != nil { return "", err } - respw, err := client.Do(reqw) + resp, err := client.Do(req) if err != nil { return "", err } - defer respw.Body.Close() - if respw.StatusCode != http.StatusOK { + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { return "", errors.New("status not ok") } - bw, _ := io.ReadAll(respw.Body) - today, err := regexp.Compile(time.Now().Format("2006-01-02")) + doc, err := xpath.Parse(resp.Body) if err != nil { return "", err } - if !today.Match(bw) { - return "", errors.New("today not found") + html := xpath.OutputHTML(doc, false) + if !strings.Contains(html, time.Now().Format("2006-01-02")) { + return "", errors.New("calendar not today") } - matchw := calendar.FindStringSubmatch(binary.BytesToString(bw)) - if len(matchw) < 2 { + images := xpath.Find(doc, `//*[@id="js_content"]/p/img`) + if images == nil { return "", errors.New("calendar not found") } - return matchw[1], nil + var image string + for i := range images { + if xpath.SelectAttr(images[i], "data-w") != "540" { + continue + } + image = xpath.SelectAttr(images[i], "data-src") + break + } + if image == "" { + return "", errors.New("image not found") + } + return image, nil }