From a77c05bdee1dc944a1d3fb4bd466101c312c5a35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dawid=20Wysoki=C5=84ski?= Date: Thu, 26 May 2022 07:48:13 +0200 Subject: [PATCH] feat: add endpoint to generate RSS feed --- go.mod | 7 ++++ go.sum | 11 ++++++ main.go | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 go.sum diff --git a/go.mod b/go.mod index 31c1334..5a9d666 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,10 @@ module github.com/Kichiyaki/lubimyczytacrss go 1.18 + +require github.com/PuerkitoBio/goquery v1.8.0 + +require ( + github.com/andybalholm/cascadia v1.3.1 // indirect + golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..d5e55e1 --- /dev/null +++ b/go.sum @@ -0,0 +1,11 @@ +github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U= +github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI= +github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= +github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= +golang.org/x/net v0.0.0-20210916014120-12bc252f5db8 h1:/6y1LfuqNuQdHAm0jjtPtgRcxIxjVZgm5OTu8/QhZvk= +golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/main.go b/main.go index 764763a..28c31cf 100644 --- a/main.go +++ b/main.go @@ -2,17 +2,28 @@ package main import ( "context" + "encoding/xml" + "fmt" + "github.com/PuerkitoBio/goquery" + "io" "log" "net/http" + "net/url" "os" "os/signal" + "strings" "time" ) +const ( + lubimyCzytacDomain = "lubimyczytac.pl" + defaultTimeout = 5 * time.Second +) + func main() { httpSrv := &http.Server{ Addr: ":9234", - Handler: nil, + Handler: newHandler(), ReadTimeout: 2 * time.Second, ReadHeaderTimeout: 2 * time.Second, WriteTimeout: 2 * time.Second, @@ -38,3 +49,107 @@ func main() { log.Fatalln("httpSrv.Shutdown:", err) } } + +type rssMain struct { + XMLName xml.Name `xml:"rss"` + Version string `xml:"version,attr"` + Channel rssChannel `xml:"channel"` +} + +type rssChannel struct { + XMLName xml.Name `xml:"channel"` + Link string `xml:"link"` + Description string `xml:"description"` + Language string `xml:"language"` + Items []rssItem `xml:"items"` +} + +type rssItem struct { + XMLName xml.Name `xml:"item"` + Title string `xml:"title"` + Link string `xml:"link"` + GUID string `xml:"guid"` + Description string `xml:"description"` +} + +func newHandler() http.Handler { + client := &http.Client{ + Timeout: defaultTimeout, + } + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + + req, err := http.NewRequestWithContext(r.Context(), http.MethodGet, r.URL.Path[1:], nil) + if err != nil || req.URL.Host != lubimyCzytacDomain { + w.WriteHeader(http.StatusBadRequest) + return + } + + resp, err := client.Do(req) + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + return + } + defer func() { + _ = resp.Body.Close() + }() + + p, err := newAuthorPageParser(resp.Body) + if err != nil { + _, _ = io.Copy(io.Discard, resp.Body) + w.WriteHeader(http.StatusBadRequest) + return + } + + w.Header().Set("Content-Type", "text/xml; charset=utf-8") + w.WriteHeader(http.StatusOK) + if err := xml.NewEncoder(w).Encode(rssMain{ + Version: "2.0", + Channel: rssChannel{ + Link: req.URL.String(), + Items: p.items(req.URL), + }, + }); err != nil { + w.WriteHeader(http.StatusInternalServerError) + return + } + }) +} + +type authorPageParser struct { + doc *goquery.Document +} + +func newAuthorPageParser(r io.Reader) (authorPageParser, error) { + doc, err := goquery.NewDocumentFromReader(r) + if err != nil { + return authorPageParser{}, fmt.Errorf("goquery.NewDocumentFromReader: %w", err) + } + return authorPageParser{ + doc: doc, + }, nil +} + +func (p authorPageParser) items(baseURL *url.URL) []rssItem { + booksSel := p.doc.Find("#authorBooks .authorAllBooks__single") + items := make([]rssItem, booksSel.Length()) + booksSel.Each(func(i int, selection *goquery.Selection) { + link := url.URL{ + Scheme: baseURL.Scheme, + Host: baseURL.Host, + Path: selection.Find(".authorAllBooks__singleTextTitle").AttrOr("href", ""), + } + linkStr := link.String() + title := strings.TrimSpace(selection.Find(".authorAllBooks__singleTextTitle").Text()) + items[i] = rssItem{ + Title: title, + Link: linkStr, + GUID: linkStr, + Description: "", + } + }) + return items +}