package scraper import ( "fmt" "net/url" "strconv" "strings" "sync" "time" "twdcbot/tribalwars" "github.com/PuerkitoBio/goquery" "github.com/gocolly/colly" ) const ( pathEnnoblementsLive = "/%s/index.php?page=ennoblements&live=live" ) type Conquer struct { Village string VillageID int NewOwnerID int NewOwnerName string NewOwnerTribeID int NewOwnerTribeName string OldOwnerID int OldOwnerName string OldOwnerTribeID int OldOwnerTribeName string ConqueredAt time.Time } type Scraper struct { worlds []string since time.Time collector *colly.Collector mutex sync.Mutex result map[string][]*Conquer } func New(worlds []string, since time.Time) *Scraper { s := &Scraper{ since: since, worlds: worlds, collector: colly.NewCollector( colly.Async(true), ), } s.collector.Limit(&colly.LimitRule{ RandomDelay: time.Second, DomainGlob: "*", Parallelism: 5, }) return s } func (s *Scraper) getIDFromNodeHref(node *goquery.Selection) int { if node != nil { nodeHref, ok := node.Attr("href") if ok { u, err := url.Parse(nodeHref) if err == nil { if idStr := u.Query().Get("id"); idStr != "" { id, err := strconv.Atoi(idStr) if err == nil { return id } } } } } return 0 } func (s *Scraper) handleHTML(row *colly.HTMLElement) { world := strings.Split(row.Request.URL.Path, "/")[1] var err error c := &Conquer{} conqueredAtString := strings.TrimSpace(row.DOM.Find("td:last-child").Text()) location := Locations[tribalwars.LanguageCodeFromWorldName(world)] c.ConqueredAt, err = time.ParseInLocation("2006-01-02 - 15:04:05", conqueredAtString, location) if err != nil || c.ConqueredAt.Before(s.since.In(location)) { return } villageAnchor := row.DOM.Find("a:first-child").First() c.VillageID = s.getIDFromNodeHref(villageAnchor) c.Village = strings.TrimSpace(villageAnchor.Text()) oldOwnerNode := row.DOM.Find("td:nth-child(3) a:first-child") if len(oldOwnerNode.Nodes) == 0 { c.OldOwnerName = "-" c.OldOwnerTribeName = "-" } else { c.OldOwnerID = s.getIDFromNodeHref(oldOwnerNode) c.OldOwnerName = strings.TrimSpace(oldOwnerNode.Text()) oldOwnerTribeNode := row.DOM.Find("td:nth-child(3) .tribelink") if len(oldOwnerTribeNode.Nodes) != 0 { c.OldOwnerTribeName = strings.TrimSpace(oldOwnerTribeNode.Text()) c.OldOwnerTribeID = s.getIDFromNodeHref(oldOwnerTribeNode) } else { c.OldOwnerTribeName = "-" } } newOwnerNode := row.DOM.Find("td:nth-child(4) a:first-child") c.NewOwnerID = s.getIDFromNodeHref(newOwnerNode) c.NewOwnerName = strings.TrimSpace(newOwnerNode.Text()) newOwnerTribeNode := row.DOM.Find("td:nth-child(4) .tribelink") if len(newOwnerTribeNode.Nodes) != 0 { c.NewOwnerTribeID = s.getIDFromNodeHref(newOwnerTribeNode) c.NewOwnerTribeName = strings.TrimSpace(newOwnerTribeNode.Text()) } else { c.NewOwnerTribeName = "-" } s.mutex.Lock() s.result[world] = append(s.result[world], c) s.mutex.Unlock() } func (s *Scraper) Scrap() map[string][]*Conquer { s.result = make(map[string][]*Conquer) s.collector.OnHTML(".r1", s.handleHTML) s.collector.OnHTML(".r2", s.handleHTML) for _, world := range s.worlds { url := TwstatsURLs[tribalwars.LanguageCodeFromWorldName(world)] if url != "" { s.collector.Visit(fmt.Sprintf(url+pathEnnoblementsLive, world)) } } s.collector.Wait() return s.result }