123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207 |
- package main
- import (
- "bytes"
- "context"
- _ "embed"
- "encoding/json"
- "fmt"
- "genBrief/db"
- "genBrief/util"
- "strings"
- "github.com/PuerkitoBio/goquery"
- openai "github.com/sashabaranov/go-openai"
- )
- //go:embed dom.json
- var domJson string
- var domMap map[string]string
- func init() {
- domMap = map[string]string{}
- err := json.Unmarshal([]byte(domJson), &domMap)
- if err != nil {
- panic(err)
- }
- db.Init()
- }
- func main() {
- loopBrief()
- loopPicture()
- }
- func loopBrief() {
- list, err := db.New("t_news").Attr("id,content,url").Where("status", "Brief").GetAll()
- fmt.Println("loopBrief", len(list), err)
- for _, record := range list {
- update := map[string]interface{}{}
- id := record["id"].(int64)
- var content, url, brief string
- if val, flag := record["content"].(string); flag {
- content = val
- }
- if val, flag := record["url"].(string); flag {
- url = val
- }
- if len(content) >= 1024 {
- brief = genBrief(content)
- update["brief"] = brief
- } else {
- res, err := util.GetHtml(url)
- if err != nil {
- fmt.Println("get Html false", err.Error())
- } else {
- content = getContent(res, "body")
- update["content"] = content
- if len(content) >= 1024 {
- brief = genBrief(content)
- update["brief"] = brief
- }
- }
- }
- //
- if brief == "" {
- update["status"] = "Delete"
- } else {
- update["status"] = "Picture"
- tags := genTag(brief)
- if tags != "" {
- alltag := do_insert_tag(id, tags)
- update["tag"] = alltag
- update["pics"] = get_pics(alltag)
- update["status"] = "Publish"
- }
- }
- fmt.Println("finishBrief", id, brief)
- if _, err := db.Pool().Update("t_news", update, map[string]interface{}{"id": id}); err != nil {
- fmt.Println("save", err.Error())
- }
- }
- }
- func loopPicture() {
- list, _ := db.New("t_news").Attr("id,tag,brief").Where("status", "Picture").GetAll()
- for _, record := range list {
- update := map[string]interface{}{}
- id := record["id"].(int64)
- var brief, tags string
- if val, flag := record["brief"].(string); flag {
- brief = val
- }
- if val, flag := record["tag"].(string); flag {
- tags = val
- }
- if tags == "" {
- tags = genTag(brief)
- if tags != "" {
- alltag := do_insert_tag(id, tags)
- update["tag"] = alltag
- update["pics"] = get_pics(alltag)
- update["status"] = "Publish"
- } else {
- update["status"] = "Delete"
- }
- } else {
- update["pics"] = get_pics(tags)
- update["status"] = "Publish"
- }
- fmt.Println("finishPicture", id, tags)
- if _, err := db.Pool().Update("t_news", update, map[string]interface{}{"id": id}); err != nil {
- fmt.Println("save", err.Error())
- }
- }
- }
- func genBrief(content string) string {
- client := openai.NewClient("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")
- resp, err := client.CreateChatCompletion(
- context.Background(),
- openai.ChatCompletionRequest{
- Model: openai.GPT3Dot5Turbo,
- Messages: []openai.ChatCompletionMessage{
- {
- Role: openai.ChatMessageRoleUser,
- Content: content + "\r\n通过以上内容,生成中文概要,文字控制500字以内。",
- },
- },
- },
- )
- if err != nil {
- fmt.Printf("ChatCompletion error: %v\n", err)
- return ""
- }
- return resp.Choices[0].Message.Content
- }
- func genTag(content string) string {
- client := openai.NewClient("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")
- resp, err := client.CreateChatCompletion(
- context.Background(),
- openai.ChatCompletionRequest{
- Model: openai.GPT3Dot5Turbo,
- Messages: []openai.ChatCompletionMessage{
- {
- Role: openai.ChatMessageRoleUser,
- Content: content + "\r\n以上新闻内容属于哪一类 A居民 B商业 C金融 D建筑 E屋内装饰 F其它\r\n仅可选一项或者两项",
- },
- },
- },
- )
- if err != nil {
- fmt.Printf("ChatCompletion error: %v\n", err)
- return ""
- }
- return resp.Choices[0].Message.Content
- }
- func get_pics(tags string) string {
- tag := strings.Split(tags, ",")[0]
- if tag == "" || tag == "other" {
- tag = "gpt"
- }
- var url string
- err := db.New("t_news_img").Attr("url").Where("tag", tag).Order(" ORDER BY RAND()").GetRow().Scan(&url)
- if err != nil {
- fmt.Println("get_pics err", tag, err.Error())
- }
- return url
- }
- func do_insert_tag(id int64, tags string) string {
- mtag := map[string]string{
- "A": "residential",
- "B": "commercial",
- "C": "financial",
- "D": "construction",
- "E": "indoor",
- "F": "other",
- }
- mtags := []string{}
- for opt, tag := range mtag {
- if strings.Contains(tags, opt) {
- record := map[string]interface{}{
- "new_id": id,
- "tag": tag,
- }
- db.Pool().Insert("t_news_tag", record)
- mtags = append(mtags, tag)
- }
- }
- return strings.Join(mtags, ",")
- }
- func getContent(body []byte, dom string) string {
- doc, _ := goquery.NewDocumentFromReader(bytes.NewReader(body))
- doc.Find("script").Remove()
- doc.Find("noscript").Remove()
- if dom == "" {
- dom = "boby"
- }
- br := doc.Find(dom).Text()
- if br == "" {
- br = doc.Find("body").Text()
- }
- return util.TrimHtml(br)
- }
|