diff --git a/CODEOWNERS b/CODEOWNERS index e45c38c..daca257 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -53,6 +53,11 @@ /room.go @jiuxia211 @SchwarzSail +## /notice.go +# 2024-朱胤帆 <1424928981@qq.com> (@SchwarzSail) +# 2024-任宝硕 (@renbaoshuo) + +/notice.go @SchwarzSail @renbaoshuo ## /user.go # 2022-林黄骁 (@ozline) diff --git a/jwch_test.go b/jwch_test.go index 7b6b3ff..08b1438 100644 --- a/jwch_test.go +++ b/jwch_test.go @@ -290,6 +290,32 @@ func TestGetNoticesInfo(t *testing.T) { } } +func TestGetNoticeDetail(t *testing.T) { + // 先获取通知列表 + noticeList, _, err := stu.GetNoticeInfo(&NoticeInfoReq{PageNum: 1}) + if err != nil { + t.Error(err) + } + if len(noticeList) == 0 { + t.Error("notice list is empty") + } + + // 获取第一个通知的详情 + detail, err := stu.GetNoticeDetail(&NoticeDetailReq{ + WbTreeId: noticeList[0].WbTreeId, + WbNewsId: noticeList[0].WbNewsId, + }) + if err != nil { + t.Error(err) + } + if detail == nil { + t.Fatal("notice detail is nil") + } + fmt.Println("Title:", detail.Title) + fmt.Println("Date:", detail.Date) + fmt.Println("Content:", detail.Content) +} + func TestGetCultivatePlan(t *testing.T) { url, err := stu.GetCultivatePlan() if err != nil { diff --git a/model.go b/model.go index 1ab3b72..67b1a20 100644 --- a/model.go +++ b/model.go @@ -203,15 +203,28 @@ type ExamRoomInfo struct { } type NoticeInfo struct { - Title string // 通知标题 - URL string // 通知链接 - Date string // 通知日期 + Title string // 通知标题 + URL string // 通知链接 + Date string // 通知日期 + WbTreeId string // 部门ID (1035: 综合科; 1036: 教学类型; 1037: 教研教改; 1038: 计划科; 1139: 实践科; 1140: 质量办; 1141: 电教中心; 1142: 教材中心; 1143: 铜盘校区管理科) + WbNewsId string // 新闻ID } type NoticeInfoReq struct { PageNum int // 获取第几页的数据,从 1 开始 } +type NoticeDetailReq struct { + WbTreeId string // 部门ID (1035: 综合科; 1036: 教学类型; 1037: 教研教改; 1038: 计划科; 1139: 实践科; 1140: 质量办; 1141: 电教中心; 1142: 教材中心; 1143: 铜盘校区管理科) + WbNewsId string // 新闻ID +} + +type NoticeDetail struct { + NoticeInfo + + Content string // 通知内容 +} + // LocateDate 当前时间 type LocateDate struct { Week string diff --git a/notice.go b/notice.go index 10a0756..c471c2a 100644 --- a/notice.go +++ b/notice.go @@ -18,6 +18,7 @@ package jwch import ( "fmt" + "net/url" "regexp" "strings" @@ -106,15 +107,17 @@ func parseNoticeInfo(doc *html.Node) ([]*NoticeInfo, error) { title := strings.TrimSpace(htmlquery.SelectAttr(titleNode, "title")) // 提取 URL - url := strings.TrimSpace(htmlquery.SelectAttr(titleNode, "href")) - url = constants.JwchNoticeURLPrefix + url + rawURL := strings.TrimSpace(htmlquery.SelectAttr(titleNode, "href")) + rawURL = constants.JwchNoticeURLPrefix + rawURL - url = convertURL(url) + convertedURL, wbTreeId, wbNewsId := convertURL(rawURL) noticeInfo := &NoticeInfo{ - Title: title, - URL: url, - Date: date, + Title: title, + URL: convertedURL, + Date: date, + WbTreeId: wbTreeId, + WbNewsId: wbNewsId, } list = append(list, noticeInfo) } @@ -138,30 +141,98 @@ func getTotalPages(doc *html.Node) (int, error) { return totalPages, nil } -// 转换函数 -/* -将 -https://jwch.fzu.edu.cn/../info/1040/13769.htm -https://jwch.fzu.edu.cn/info/1038/14038.htm -https://jwch.fzu.edu.cn/../content.jsp?urltype=news.NewsContentUrl&wbtreeid=1039&wbnewsid=14075 -改成 -https://jwch.fzu.edu.cn/content.jsp?urltype=news.NewsContentUrl&wbtreeid=1040&wbnewsid=13768 -*/ -func convertURL(original string) string { +// 将通知公告列表中的 URL 转换成 content.jsp 格式,并提取 wbtreeid 和 wbnewsid +// +// 例:将 +// - https://jwch.fzu.edu.cn/../info/1040/13769.htm +// - https://jwch.fzu.edu.cn/info/1040/13769.htm +// - https://jwch.fzu.edu.cn/../content.jsp?urltype=news.NewsContentUrl&wbtreeid=1040&wbnewsid=13769 +// +// 转换成 +// - https://jwch.fzu.edu.cn/content.jsp?urltype=news.NewsContentUrl&wbtreeid=1040&wbnewsid=13769 +// +// Returns: +// - finalURL +// - wbTreeId +// - wbNewsId +func convertURL(original string) (string, string, string) { // 去除 "../" cleaned := strings.ReplaceAll(original, "../", "") - // 正则提取 wbtreeid 和 wbnewsid + // 正则提取 wbtreeid 和 wbnewsid(info/TREE/NEWS.htm 格式) re := regexp.MustCompile(`info/(\d+)/(\d+)\.htm`) matches := re.FindStringSubmatch(cleaned) - if len(matches) != 3 { - return cleaned + if len(matches) == 3 { + wbtreeid := matches[1] + wbnewsid := matches[2] + newURL := fmt.Sprintf("https://jwch.fzu.edu.cn/content.jsp?urltype=news.NewsContentUrl&wbtreeid=%s&wbnewsid=%s", wbtreeid, wbnewsid) + return newURL, wbtreeid, wbnewsid + } + + // 已经是 content.jsp 格式,从 query string 中提取 wbtreeid 和 wbnewsid + parsed, err := url.Parse(cleaned) + if err == nil { + q := parsed.Query() + wbtreeid := q.Get("wbtreeid") + wbnewsid := q.Get("wbnewsid") + if wbtreeid != "" && wbnewsid != "" { + return cleaned, wbtreeid, wbnewsid + } + } + + return cleaned, "", "" +} + +// GetNoticeDetail 获取通知正文内容 +func (s *Student) GetNoticeDetail(req *NoticeDetailReq) (*NoticeDetail, error) { + targetURL := fmt.Sprintf("https://jwch.fzu.edu.cn/content.jsp?urltype=news.NewsContentUrl&wbtreeid=%s&wbnewsid=%s", req.WbTreeId, req.WbNewsId) + + res, err := s.NewRequest(). + SetHeader("User-Agent", constants.UserAgent). + Get(targetURL) + if err != nil { + return nil, fmt.Errorf("GetNoticeDetail: failed to fetch url %s: %w", targetURL, err) + } + + doc, err := htmlquery.Parse(strings.NewReader(string(res.Body()))) + if err != nil { + return nil, fmt.Errorf("GetNoticeDetail: failed to parse html: %w", err) + } + + // 主容器 + mainNode := htmlquery.FindOne(doc, "//div[contains(@class,'xl_main')]") + if mainNode == nil { + return nil, fmt.Errorf("GetNoticeDetail: .xl_main not found, url=%s", targetURL) + } + + // 提取标题 + titleNode := htmlquery.FindOne(mainNode, ".//*[contains(@class,'xl_tit')]/h4") + if titleNode == nil { + return nil, fmt.Errorf("GetNoticeDetail: .xl_tit h4 not found, url=%s", targetURL) } + title := strings.TrimSpace(htmlquery.InnerText(titleNode)) - wbtreeid := matches[1] - wbnewsid := matches[2] + // 提取发布时间 + timeNode := htmlquery.FindOne(mainNode, ".//*[contains(@class,'xl_sj')]//span[1]") + if timeNode == nil { + return nil, fmt.Errorf("GetNoticeDetail: .xl_sj span not found, url=%s", targetURL) + } + date := strings.TrimPrefix(strings.TrimSpace(htmlquery.InnerText(timeNode)), "发布时间:") + + // 提取内容 + contentNode := htmlquery.FindOne(mainNode, ".//*[@id='vsb_content']") + if contentNode == nil { + return nil, fmt.Errorf("GetNoticeDetail: #vsb_content not found, url=%s", targetURL) + } - // 构造新的 URL - newURL := fmt.Sprintf("https://jwch.fzu.edu.cn/content.jsp?urltype=news.NewsContentUrl&wbtreeid=%s&wbnewsid=%s", wbtreeid, wbnewsid) - return newURL + return &NoticeDetail{ + NoticeInfo: NoticeInfo{ + Title: title, + Date: date, + URL: targetURL, + WbTreeId: req.WbTreeId, + WbNewsId: req.WbNewsId, + }, + Content: htmlquery.InnerText(contentNode), + }, nil }