main.go 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. package main
  2. import (
  3. "bytes"
  4. "context"
  5. _ "embed"
  6. "encoding/json"
  7. "fmt"
  8. "genBrief/db"
  9. "genBrief/util"
  10. "strings"
  11. "github.com/PuerkitoBio/goquery"
  12. openai "github.com/sashabaranov/go-openai"
  13. )
  14. //go:embed dom.json
  15. var domJson string
  16. var domMap map[string]string
  17. func init() {
  18. domMap = map[string]string{}
  19. err := json.Unmarshal([]byte(domJson), &domMap)
  20. if err != nil {
  21. panic(err)
  22. }
  23. db.Init()
  24. }
  25. func main() {
  26. loopBrief()
  27. loopPicture()
  28. }
  29. func loopBrief() {
  30. list, err := db.New("t_news").Attr("id,content,url").Where("status", "Brief").GetAll()
  31. fmt.Println("loopBrief", len(list), err)
  32. for _, record := range list {
  33. update := map[string]interface{}{}
  34. id := record["id"].(int64)
  35. var content, url, brief string
  36. if val, flag := record["content"].(string); flag {
  37. content = val
  38. }
  39. if val, flag := record["url"].(string); flag {
  40. url = val
  41. }
  42. if len(content) >= 1024 {
  43. brief = genBrief(content)
  44. update["brief"] = brief
  45. } else {
  46. res, err := util.GetHtml(url)
  47. if err != nil {
  48. fmt.Println("get Html false", err.Error())
  49. } else {
  50. content = getContent(res, "body")
  51. update["content"] = content
  52. if len(content) >= 1024 {
  53. brief = genBrief(content)
  54. update["brief"] = brief
  55. }
  56. }
  57. }
  58. //
  59. if brief == "" {
  60. update["status"] = "Delete"
  61. } else {
  62. update["status"] = "Picture"
  63. tags := genTag(brief)
  64. if tags != "" {
  65. alltag := do_insert_tag(id, tags)
  66. update["tag"] = alltag
  67. update["pics"] = get_pics(alltag)
  68. update["status"] = "Publish"
  69. }
  70. }
  71. fmt.Println("finishBrief", id, brief)
  72. if _, err := db.Pool().Update("t_news", update, map[string]interface{}{"id": id}); err != nil {
  73. fmt.Println("save", err.Error())
  74. }
  75. }
  76. }
  77. func loopPicture() {
  78. list, _ := db.New("t_news").Attr("id,tag,brief").Where("status", "Picture").GetAll()
  79. for _, record := range list {
  80. update := map[string]interface{}{}
  81. id := record["id"].(int64)
  82. var brief, tags string
  83. if val, flag := record["brief"].(string); flag {
  84. brief = val
  85. }
  86. if val, flag := record["tag"].(string); flag {
  87. tags = val
  88. }
  89. if tags == "" {
  90. tags = genTag(brief)
  91. if tags != "" {
  92. alltag := do_insert_tag(id, tags)
  93. update["tag"] = alltag
  94. update["pics"] = get_pics(alltag)
  95. update["status"] = "Publish"
  96. } else {
  97. update["status"] = "Delete"
  98. }
  99. } else {
  100. update["pics"] = get_pics(tags)
  101. update["status"] = "Publish"
  102. }
  103. fmt.Println("finishPicture", id, tags)
  104. if _, err := db.Pool().Update("t_news", update, map[string]interface{}{"id": id}); err != nil {
  105. fmt.Println("save", err.Error())
  106. }
  107. }
  108. }
  109. func genBrief(content string) string {
  110. client := openai.NewClient("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")
  111. resp, err := client.CreateChatCompletion(
  112. context.Background(),
  113. openai.ChatCompletionRequest{
  114. Model: openai.GPT3Dot5Turbo,
  115. Messages: []openai.ChatCompletionMessage{
  116. {
  117. Role: openai.ChatMessageRoleUser,
  118. Content: content + "\r\n通过以上内容,生成中文概要,文字控制500字以内。",
  119. },
  120. },
  121. },
  122. )
  123. if err != nil {
  124. fmt.Printf("ChatCompletion error: %v\n", err)
  125. return ""
  126. }
  127. return resp.Choices[0].Message.Content
  128. }
  129. func genTag(content string) string {
  130. client := openai.NewClient("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")
  131. resp, err := client.CreateChatCompletion(
  132. context.Background(),
  133. openai.ChatCompletionRequest{
  134. Model: openai.GPT3Dot5Turbo,
  135. Messages: []openai.ChatCompletionMessage{
  136. {
  137. Role: openai.ChatMessageRoleUser,
  138. Content: content + "\r\n以上新闻内容属于哪一类 A居民 B商业 C金融 D建筑 E屋内装饰 F其它\r\n仅可选一项或者两项",
  139. },
  140. },
  141. },
  142. )
  143. if err != nil {
  144. fmt.Printf("ChatCompletion error: %v\n", err)
  145. return ""
  146. }
  147. return resp.Choices[0].Message.Content
  148. }
  149. func get_pics(tags string) string {
  150. tag := strings.Split(tags, ",")[0]
  151. if tag == "" || tag == "other" {
  152. tag = "gpt"
  153. }
  154. var url string
  155. err := db.New("t_news_img").Attr("url").Where("tag", tag).Order(" ORDER BY RAND()").GetRow().Scan(&url)
  156. if err != nil {
  157. fmt.Println("get_pics err", tag, err.Error())
  158. }
  159. return url
  160. }
  161. func do_insert_tag(id int64, tags string) string {
  162. mtag := map[string]string{
  163. "A": "residential",
  164. "B": "commercial",
  165. "C": "financial",
  166. "D": "construction",
  167. "E": "indoor",
  168. "F": "other",
  169. }
  170. mtags := []string{}
  171. for opt, tag := range mtag {
  172. if strings.Contains(tags, opt) {
  173. record := map[string]interface{}{
  174. "new_id": id,
  175. "tag": tag,
  176. }
  177. db.Pool().Insert("t_news_tag", record)
  178. mtags = append(mtags, tag)
  179. }
  180. }
  181. return strings.Join(mtags, ",")
  182. }
  183. func getContent(body []byte, dom string) string {
  184. doc, _ := goquery.NewDocumentFromReader(bytes.NewReader(body))
  185. doc.Find("script").Remove()
  186. doc.Find("noscript").Remove()
  187. if dom == "" {
  188. dom = "boby"
  189. }
  190. br := doc.Find(dom).Text()
  191. if br == "" {
  192. br = doc.Find("body").Text()
  193. }
  194. return util.TrimHtml(br)
  195. }