package main import ( "bytes" "context" _ "embed" "encoding/json" "fmt" "genBrief/db" "genBrief/util" "strings" "github.com/PuerkitoBio/goquery" openai "github.com/sashabaranov/go-openai" ) //go:embed dom.json var domJson string var domMap map[string]string func init() { domMap = map[string]string{} err := json.Unmarshal([]byte(domJson), &domMap) if err != nil { panic(err) } db.Init() } func main() { loopBrief() loopPicture() } func loopBrief() { list, err := db.New("t_news").Attr("id,content,url").Where("status", "Brief").GetAll() fmt.Println("loopBrief", len(list), err) for _, record := range list { update := map[string]interface{}{} id := record["id"].(int64) var content, url, brief string if val, flag := record["content"].(string); flag { content = val } if val, flag := record["url"].(string); flag { url = val } if len(content) >= 1024 { brief = genBrief(content) update["brief"] = brief } else { res, err := util.GetHtml(url) if err != nil { fmt.Println("get Html false", err.Error()) } else { content = getContent(res, "body") update["content"] = content if len(content) >= 1024 { brief = genBrief(content) update["brief"] = brief } } } // if brief == "" { update["status"] = "Delete" } else { update["status"] = "Picture" tags := genTag(brief) if tags != "" { alltag := do_insert_tag(id, tags) update["tag"] = alltag update["pics"] = get_pics(alltag) update["status"] = "Publish" } } fmt.Println("finishBrief", id, brief) if _, err := db.Pool().Update("t_news", update, map[string]interface{}{"id": id}); err != nil { fmt.Println("save", err.Error()) } } } func loopPicture() { list, _ := db.New("t_news").Attr("id,tag,brief").Where("status", "Picture").GetAll() for _, record := range list { update := map[string]interface{}{} id := record["id"].(int64) var brief, tags string if val, flag := record["brief"].(string); flag { brief = val } if val, flag := record["tag"].(string); flag { tags = val } if tags == "" { tags = genTag(brief) if tags != "" { alltag := do_insert_tag(id, tags) update["tag"] = alltag update["pics"] = get_pics(alltag) update["status"] = "Publish" } else { update["status"] = "Delete" } } else { update["pics"] = get_pics(tags) update["status"] = "Publish" } fmt.Println("finishPicture", id, tags) if _, err := db.Pool().Update("t_news", update, map[string]interface{}{"id": id}); err != nil { fmt.Println("save", err.Error()) } } } func genBrief(content string) string { client := openai.NewClient("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") resp, err := client.CreateChatCompletion( context.Background(), openai.ChatCompletionRequest{ Model: openai.GPT3Dot5Turbo, Messages: []openai.ChatCompletionMessage{ { Role: openai.ChatMessageRoleUser, Content: content + "\r\n通过以上内容,生成中文概要,文字控制500字以内。", }, }, }, ) if err != nil { fmt.Printf("ChatCompletion error: %v\n", err) return "" } return resp.Choices[0].Message.Content } func genTag(content string) string { client := openai.NewClient("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") resp, err := client.CreateChatCompletion( context.Background(), openai.ChatCompletionRequest{ Model: openai.GPT3Dot5Turbo, Messages: []openai.ChatCompletionMessage{ { Role: openai.ChatMessageRoleUser, Content: content + "\r\n以上新闻内容属于哪一类 A居民 B商业 C金融 D建筑 E屋内装饰 F其它\r\n仅可选一项或者两项", }, }, }, ) if err != nil { fmt.Printf("ChatCompletion error: %v\n", err) return "" } return resp.Choices[0].Message.Content } func get_pics(tags string) string { tag := strings.Split(tags, ",")[0] if tag == "" || tag == "other" { tag = "gpt" } var url string err := db.New("t_news_img").Attr("url").Where("tag", tag).Order(" ORDER BY RAND()").GetRow().Scan(&url) if err != nil { fmt.Println("get_pics err", tag, err.Error()) } return url } func do_insert_tag(id int64, tags string) string { mtag := map[string]string{ "A": "residential", "B": "commercial", "C": "financial", "D": "construction", "E": "indoor", "F": "other", } mtags := []string{} for opt, tag := range mtag { if strings.Contains(tags, opt) { record := map[string]interface{}{ "new_id": id, "tag": tag, } db.Pool().Insert("t_news_tag", record) mtags = append(mtags, tag) } } return strings.Join(mtags, ",") } func getContent(body []byte, dom string) string { doc, _ := goquery.NewDocumentFromReader(bytes.NewReader(body)) doc.Find("script").Remove() doc.Find("noscript").Remove() if dom == "" { dom = "boby" } br := doc.Find(dom).Text() if br == "" { br = doc.Find("body").Text() } return util.TrimHtml(br) }