go查漏补缺2.0

Last updated on 3 months ago

起因是在看字节青训营的时候刷到了福州大学的一个up的项目视频,跟着评论区偶然发现了他们学校的西二在线工作室的组织仓库,看了golang的学习路线,大为震撼,所以打算跟着学一遍
https://github.com/west2-online/

基础语法

洛谷

1046

https://www.luogu.com.cn/problem/P1046

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
package main

import (
"fmt"
)

func main() {
arr := make([]int, 0, 10)
for i := 0; i < 10; i++ {
num := 0
fmt.Scan(&num)
arr = append(arr, num)
}
var h int
fmt.Scan(&h)
h += 30
count := 0
for _, v := range arr {
if v <= h {
count++
}
}
fmt.Println(count)
}

5737

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
package main

import (
"fmt"
)

func IsLeap(year int) bool {
return year%400 == 0 || (year%4 == 0 && year%100 != 0)
}
func main() {
var x, y int
fmt.Scan(&x)
fmt.Scan(&y)
flag := false
if x >= 1582 && y <= 3000 && x < y {
flag = true
}
count := 0
arr := make([]int, 0, y-x)
for i := x; i <= y; i++ {
if IsLeap(i) && flag {
count++
arr = append(arr, i)
}
}
fmt.Println(count)
for _, v := range arr {
fmt.Print(v, " ")
}
}

IsPrime

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
package main

import (
"fmt"
)

func IsPrime(num int) bool {
for i := 2; i*i <= num; i++ {
if num%i == 0 {
return false
}
}
return true
}
func main() {
var x int
PrimeArr := make([]int, 0, 1000000)
for i := 17; i < 1000000; i++ {
if IsPrime(i) {
PrimeArr = append(PrimeArr, i)
}
}
fmt.Scan(&x)
flag := false
for _, v := range PrimeArr {
if x == v {
fmt.Println("YES")
flag = true
return
}
}
if !flag {
fmt.Println("NO")
}
}

跑了200ms….感觉还能优化,没必要创字典

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
package main

import (
"fmt"
)

func IsPrime(num int) bool {
for i := 2; i*i <= num; i++ {
if num%i == 0 {
return false
}
}
return true
}
func main() {
var x int
fmt.Scan(&x)
if IsPrime(x) {
fmt.Println("YES")
} else {
fmt.Println("NO")
}
}

114514

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21

package main

func IsMultiple(num int) bool {
return num%3 == 0
}
func main() {
arr := make([]int, 0, 60)
for i := 1; i <= 50; i++ {
arr = append(arr, i)
}
for i := 0; i < len(arr); i++ {
if IsMultiple(arr[i]) {
arr = append(arr[:i], arr[i+1:]...)
}
}
arr = append(arr, 114514)
for _, v := range arr {
println(v)
}
}

bonus

本博客

爬虫

原理

创建请求并发送

构造客户端 (var client http.Client)
构造http请求 (GET POST) http.NewRequest(“GET”,url,nil)
设置请求头

1
2
3
4
5
6
7
8
req.Header.Set("Connection", "keep-alive")
req.Header.Set("Pragma", "no-cache")
req.Header.Set("Cache-Control", "no-cache")
req.Header.Set("Upgrade-Insecure-Requests", "1")
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9")

发送请求(defer 关闭连接)
client.Do(req)

选择元素

  • CSS选择器
  • ```go get github.com/PuerkitoBio/goquery``` F12,选择元素,右键,复制,复制selector
  • XPath语法
  • ```go get github.com/antchfx/htmlquery``` F12,选择元素,右键,复制,复制XPath/完整XPath
  • 正则表达式
  • 原生regexp不支持Perl语法

获取节点信息

保存到本地/数据库

提前定义数据结构

1
2
3
4
5
6
func (table *Movie) TableName() string {
return "movies"
}
//这里的这个函数的意思是,在创建数据表单时,用movies当作表名(使用DB.AutoMigrate(&Movie{})时会根据结构体的名字自动创建表名,这里是人为定义一下)
DB.AutoMigrate(&Movie{})
//这个是根据结构体自动创建一个表单

在定义gorm的数据库连接对象时,err要提前定义

在已经定义全区变量
var DB *gorm.DB
后,如果再
DB,err:=gorm.Open(mysql.Open(path),&gorm.Config{})
这里看似只声明并定义了新变量err,但实际把DB也给覆盖定义了,也就是说这里的DB不再是全局变量DB,而是再初始化数据库连接中的局部变量DB
正确方法是
var err error
DB,err=gorm.Open(mysql.Open(path),&gorm.Config{})

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
package main

import (
"fmt"
"github.com/PuerkitoBio/goquery"
"gorm.io/driver/mysql"
"gorm.io/gorm"
"log"
"net/http"
"regexp"
"strings"
)

var (
DB *gorm.DB
count int
)

type Movie struct {
gorm.Model
Title string `json:"title" gorm:"type:varchar(255);not null;"`
Img string `json:"img" gorm:"type:varchar(256);not null;"`
Rank string `json:"rank" gorm:"type:varchar(256);not null;"`
Desc string `json:"desc" gorm:"type:varchar(256);not null;"`
Tags string `json:"tags"`
Author string `json:"author"`
Actor string `json:"actor"`
Time string `json:"time"`
}

func main() {
InitDB()
for i := 0; i < 10; i++ {
num := fmt.Sprintf("%d", i*25)
Spider(num)
}
}
func Err(err error, str string) {
if err != nil {
log.Fatalln(str, "is: ", err)
}
}
func Spider(page string) {
client := http.Client{}
url := `https://movie.douban.com/top250?start=`
req, err := http.NewRequest("GET", url+page, nil)
Err(err, "创建连接失败")
req.Header.Set("Connection", "keep-alive")
req.Header.Set("Pragma", "no-cache")
req.Header.Set("Cache-Control", "no-cache")
req.Header.Set("Upgrade-Insecure-Requests", "1")
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9")

resp, err := client.Do(req)
Err(err, "请求连接失败")

//解析网页
docs, err := goquery.NewDocumentFromReader(resp.Body)

defer resp.Body.Close()

Err(err, "解析网页失败")

//获取节点信息
//#content > div > div.article > ol > li:nth-child(1) > div > div.info > div.hd > a > span:nth-child(1)
docs.Find("#content > div > div.article > ol > li").
Each(func(i int, s *goquery.Selection) {
title := s.Find("div > div.info > div.hd > a > span:nth-child(1)").Text()
imgTag := s.Find("div > div.pic > a > img")
img, ok := imgTag.Attr("src")
info := s.Find("div > div.info > div.bd > p:nth-child(1)").Text()
rank := s.Find("div > div.info > div.bd > div > span.rating_num").Text()
desc := s.Find("div > div.info > div.bd > p.quote > span").Text()
if ok {
count++
author, actor, time, tags := InfoSplit(info)
if title == "" {
title = "none"
}
if img == "" {
img = "none"
}
if author == "" {
author = "none"
}
if actor == "" {
actor = "none"
}
if time == "" {
time = "none"
}
if tags == "" {
tags = "none"
}
if rank == "" {
rank = "none"
}
if desc == "" {
desc = "none"
}
data := Movie{
Title: title,
Img: img,
Author: author,
Actor: actor,
Time: time,
Tags: tags,
Rank: rank,
Desc: desc,
}
InsertDB(&data)
fmt.Println(data)
}
})
}
func InfoSplit(info string) (author, actor, time, tags string) {
//电影导演
authorRe, err := regexp.Compile(`导演:.*   `)
Err(err, "电影导演错误")
author = string(authorRe.Find([]byte(info)))
author = strings.TrimPrefix(author, "导演:")
author = strings.TrimSpace(author)

//电影演员
actorRe, err := regexp.Compile(`主演:(.*)`)
Err(err, "电影演员错误")
actor = string(actorRe.Find([]byte(info)))
actor = strings.TrimPrefix(actor, "主演:")
parts := strings.Split(actor, "/")
actor = parts[0]
actor = strings.TrimSpace(actor)

//电影时间
timeRe, err := regexp.Compile(`(\d+)`)
Err(err, "电影时间错误")
time = string(timeRe.Find([]byte(info)))
time = strings.TrimSpace(time)

//电影标签
tagsRe, err := regexp.Compile(`/([^\/]+)$`)
Err(err, "电影标签错误")
tags = string(tagsRe.Find([]byte(info)))
tags = strings.TrimSpace(tags)

return
}
func InitDB() {
path := "root:root@tcp(127.0.0.1:3306)/douban?charset=utf8mb4&parseTime=True&loc=Local"
var err error
DB, err = gorm.Open(mysql.Open(path), &gorm.Config{})
Err(err, "数据库连接失败")
DB.AutoMigrate(&Movie{})
fmt.Println("数据库连接成功")
}
func InsertDB(data *Movie) {
result := DB.Create(data)
Err(result.Error, "insert failed")
}

func (table *Movie) TableName() string {
return "movies"
}

爬评论

难点有3个,
第一是评论是通过api来加载的,也就是说正常情况是滑到哪里哪里的评论加载出来(类似懒加载?),所以需要一直滑网页进度条同时关注f12中的fetch/XHR,其中有几个才是要访问的url
第二是记得设置请求头,只按照 小生凡一 给的请求头设置参数会出问题(记得加cookie),同时
Sec-Fetch-Dest: document
Accept-Encoding: gzip, deflate, br
这俩加上的话就会爬不出内容,看不懂为什么
最后一个是我没看过国王排名,爬辉夜的吧

并发爬虫过段时间再搞吧,先把中间件鉴权给写完

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
package main

import (
"fmt"
"gorm.io/driver/mysql"
"gorm.io/gorm"
"io"
"log"
"net/http"
"regexp"
"strings"
)

var (
DB *gorm.DB
)

func Err(err error, str string) {
if err != nil {
log.Fatalln(str, "is: ", err)
}
}

type Comment struct {
gorm.Model
Name string `json:"name"`
Message string `json:"message"`
}

func main() {
InitDB()
urls := []string{
"https://api.bilibili.com/x/v2/reply/wbi/main?oid=40559171&type=1&mode=3&pagination_str=%7B%22offset%22:%22%7B%5C%22type%5C%22:1,%5C%22direction%5C%22:1,%5C%22session_id%5C%22:%5C%221740848790700856%5C%22,%5C%22data%5C%22:%7B%7D%7D%22%7D&plat=1&web_location=1315875&w_rid=7e2b3f24bb98af6f95615b6497da021c&wts=1700047667",
"https://api.bilibili.com/x/v2/reply/wbi/main?oid=40559171&type=1&mode=3&pagination_str=%7B%22offset%22:%22%7B%5C%22type%5C%22:1,%5C%22direction%5C%22:1,%5C%22session_id%5C%22:%5C%221740848790700856%5C%22,%5C%22data%5C%22:%7B%7D%7D%22%7D&plat=1&web_location=1315875&w_rid=359566361b21a402fb41e3d8ee81c872&wts=1700047670",
}
for index, url := range urls {
Spider(url, index+1)
}
}
func Spider(url string, index int) {
var client http.Client
req, err := http.NewRequest("GET", url, nil)
Err(err, "创建连接失败")

req.Header.Set("sec-ch-ua", "\"Not_A Brand\";v=\"8\", \"Chromium\";v=\"120\", \"Microsoft Edge\";v=\"120\"")
req.Header.Set("sec-ch-ua-mobile", "?0")
req.Header.Set("sec-ch-ua-platform", "\"Windows\"")
req.Header.Set("Upgrade-Insecure-Requests", "1")
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7")
req.Header.Set("Sec-Fetch-Site", "none")
req.Header.Set("Sec-Fetch-Mode", "navigate")
req.Header.Set("Sec-Fetch-User", "?1")
//req.Header.Set("Sec-Fetch-Dest", "document")
//req.Header.Set("Accept-Encoding", "gzip, deflate, br")
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7")
req.Header.Set("Cookie", "*******************************************************")

resp, err := client.Do(req)
Err(err, "发起连接失败")
defer resp.Body.Close()
fmt.Println("正在爬取第", index, "页的评论")
docBytes, err := io.ReadAll(resp.Body)
Err(err, "读取网页失败")
docs := string(docBytes)
messages, messagesLen := messageSplit(docs)
names, namesLen := nameSplit(docs)
if messagesLen <= namesLen {
for i := 0; i < messagesLen; i++ {
name := strings.TrimPrefix(names[i], "\"uname\":")
message := strings.TrimPrefix(messages[i], "\"message\":")
comment := Comment{
Name: name,
Message: message,
}
DB.Create(&comment)
}
} else {
for i := 0; i < namesLen; i++ {
name := strings.TrimPrefix(names[i], "\"uname\":")
message := strings.TrimPrefix(messages[i], "\"message\":")
comment := Comment{
Name: name,
Message: message,
}
DB.Create(&comment)
}
}

}
func messageSplit(docs string) ([]string, int) {
count := 0
docRe, err := regexp.Compile(`"message":".*?"`)
matches := docRe.FindAllString(docs, -1)
Err(err, "编译正则表达式失败")
for _ = range matches {
count++
}
return matches, count
}
func InitDB() {
path := "root:root@tcp(127.0.0.1:3306)/HuiYe?charset=utf8mb4&parseTime=True&loc=Local"
var err error
DB, err = gorm.Open(mysql.Open(path), &gorm.Config{})
Err(err, "数据库连接失败")
DB.AutoMigrate(&Comment{})
fmt.Println("数据库连接成功")
}
func nameSplit(docs string) ([]string, int) {
count := 0
docRe, err := regexp.Compile(`"uname":".*?"`)
matches := docRe.FindAllString(docs, -1)
Err(err, "编译正则表达式失败")
for _ = range matches {
count++
}
return matches, count
}

并发爬虫

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
package main

import (
"fmt"
"github.com/PuerkitoBio/goquery"
"gorm.io/driver/mysql"
"gorm.io/gorm"
"log"
"net/http"
"regexp"
"strings"
"time"
)

var (
DB *gorm.DB
count int
)

type Movie struct {
gorm.Model
Title string `json:"title" gorm:"type:varchar(255);not null;"`
Img string `json:"img" gorm:"type:varchar(256);not null;"`
Rank string `json:"rank" gorm:"type:varchar(256);not null;"`
Desc string `json:"desc" gorm:"type:varchar(256);not null;"`
Tags string `json:"tags"`
Author string `json:"author"`
Actor string `json:"actor"`
Time string `json:"time"`
}

func main() {
InitDB()
start := time.Now()
for i := 0; i < 10; i++ {
num := fmt.Sprintf("%d", i*25)
Spider(num)
}
end := time.Since(start)
st := time.Now()
ch := make(chan bool)
for i := 0; i < 10; i++ {
num := fmt.Sprintf("%d", i*25)
go Spider2(num, ch)
}
for i := 0; i < 10; i++ {
<-ch
}
ed := time.Since(st)

fmt.Println("正常爬虫耗时:", end) // 3.8124319s
fmt.Println("go协程爬虫耗时:", ed) // 468.1153ms
fmt.Println("爬取速率比: ", end/ed)
}
func Err(err error, str string) {
if err != nil {
log.Fatalln(str, "is: ", err)
}
}
func Spider(page string) {
client := http.Client{}
url := `https://movie.douban.com/top250?start=`
req, err := http.NewRequest("GET", url+page, nil)
Err(err, "创建连接失败")
req.Header.Set("Connection", "keep-alive")
req.Header.Set("Pragma", "no-cache")
req.Header.Set("Cache-Control", "no-cache")
req.Header.Set("Upgrade-Insecure-Requests", "1")
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9")

resp, err := client.Do(req)
Err(err, "请求连接失败")
//解析网页
docs, err := goquery.NewDocumentFromReader(resp.Body)

defer resp.Body.Close()

Err(err, "解析网页失败")

//获取节点信息
//#content > div > div.article > ol > li:nth-child(1) > div > div.info > div.hd > a > span:nth-child(1)
docs.Find("#content > div > div.article > ol > li").
Each(func(i int, s *goquery.Selection) {
title := s.Find("div > div.info > div.hd > a > span:nth-child(1)").Text()
imgTag := s.Find("div > div.pic > a > img")
img, ok := imgTag.Attr("src")
info := s.Find("div > div.info > div.bd > p:nth-child(1)").Text()
rank := s.Find("div > div.info > div.bd > div > span.rating_num").Text()
desc := s.Find("div > div.info > div.bd > p.quote > span").Text()
if ok {
count++
author, actor, time, tags := InfoSplit(info)
if title == "" {
title = "none"
}
if img == "" {
img = "none"
}
if author == "" {
author = "none"
}
if actor == "" {
actor = "none"
}
if time == "" {
time = "none"
}
if tags == "" {
tags = "none"
}
if rank == "" {
rank = "none"
}
if desc == "" {
desc = "none"
}
data := Movie{
Title: title,
Img: img,
Author: author,
Actor: actor,
Time: time,
Tags: tags,
Rank: rank,
Desc: desc,
}
InsertDB(&data)
fmt.Println(data)
}
})
}
func Spider2(page string, ch chan bool) {
client := http.Client{}
url := `https://movie.douban.com/top250?start=`
req, err := http.NewRequest("GET", url+page, nil)
Err(err, "创建连接失败")
req.Header.Set("Connection", "keep-alive")
req.Header.Set("Pragma", "no-cache")
req.Header.Set("Cache-Control", "no-cache")
req.Header.Set("Upgrade-Insecure-Requests", "1")
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9")
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9")

resp, err := client.Do(req)
Err(err, "请求连接失败")
//解析网页
docs, err := goquery.NewDocumentFromReader(resp.Body)

defer resp.Body.Close()

Err(err, "解析网页失败")

//获取节点信息
//#content > div > div.article > ol > li:nth-child(1) > div > div.info > div.hd > a > span:nth-child(1)
docs.Find("#content > div > div.article > ol > li").
Each(func(i int, s *goquery.Selection) {
title := s.Find("div > div.info > div.hd > a > span:nth-child(1)").Text()
imgTag := s.Find("div > div.pic > a > img")
img, ok := imgTag.Attr("src")
info := s.Find("div > div.info > div.bd > p:nth-child(1)").Text()
rank := s.Find("div > div.info > div.bd > div > span.rating_num").Text()
desc := s.Find("div > div.info > div.bd > p.quote > span").Text()
if ok {
count++
author, actor, time, tags := InfoSplit(info)
if title == "" {
title = "none"
}
if img == "" {
img = "none"
}
if author == "" {
author = "none"
}
if actor == "" {
actor = "none"
}
if time == "" {
time = "none"
}
if tags == "" {
tags = "none"
}
if rank == "" {
rank = "none"
}
if desc == "" {
desc = "none"
}
data := Movie{
Title: title,
Img: img,
Author: author,
Actor: actor,
Time: time,
Tags: tags,
Rank: rank,
Desc: desc,
}
InsertDB(&data)
fmt.Println(data)
if ch != nil {
ch <- true
}
}
})
}
func InfoSplit(info string) (author, actor, time, tags string) {
//电影导演
authorRe, err := regexp.Compile(`导演:.*   `)
Err(err, "电影导演错误")
author = string(authorRe.Find([]byte(info)))
author = strings.TrimPrefix(author, "导演:")
author = strings.TrimSpace(author)

//电影演员
actorRe, err := regexp.Compile(`主演:(.*)`)
Err(err, "电影演员错误")
actor = string(actorRe.Find([]byte(info)))
actor = strings.TrimPrefix(actor, "主演:")
parts := strings.Split(actor, "/")
actor = parts[0]
actor = strings.TrimSpace(actor)

//电影时间
timeRe, err := regexp.Compile(`(\d+)`)
Err(err, "电影时间错误")
time = string(timeRe.Find([]byte(info)))
time = strings.TrimSpace(time)

//电影标签
tagsRe, err := regexp.Compile(`/([^\/]+)$`)
Err(err, "电影标签错误")
tags = string(tagsRe.Find([]byte(info)))
tags = strings.TrimSpace(tags)

return
}
func InitDB() {
path := "root:root@tcp(127.0.0.1:3306)/douban?charset=utf8mb4&parseTime=True&loc=Local"
var err error
DB, err = gorm.Open(mysql.Open(path), &gorm.Config{})
Err(err, "数据库连接失败")
DB.AutoMigrate(&Movie{})
fmt.Println("数据库连接成功")
}
func InsertDB(data *Movie) {
result := DB.Create(data)
Err(result.Error, "insert failed")
}

func (table *Movie) TableName() string {
return "movies"
}

备忘录

仓库
https://github.com/taosu0216/go_stu/tree/main/memorandum

hertz

这玩意有点怪,官网给的文档是直接脚手架(用不大习惯),但也有能直接调用的api

创建一个helloworld

需要先
go get github.com/cloudwego/hertz/pkg/app
Default会使用默认中间件,想全自己写就是New
默认开放端口就是8888,如果想改的话就得像下面这样(这个改端口官方也没教程,还是掘金别人的文章看到才知道的,也不知道他们是在哪里学的)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
package main

import (
"context"

"github.com/cloudwego/hertz/pkg/app"
"github.com/cloudwego/hertz/pkg/app/server"
"github.com/cloudwego/hertz/pkg/protocol/consts"
)

func main() {
// server.Default() creates a Hertz with recovery middleware.
// If you need a pure hertz, you can use server.New()
h := server.Default(server.WithHostPorts("127.0.0.1:8083"))
h.GET("/hello", func(ctx context.Context, c *app.RequestContext) {
c.String(consts.StatusOK, "Hello hertz!")
})

h.Spin()
}

找着改端口的例子了
https://github.com/cloudwego/hertz-examples/blob/main/config/main.go

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
/*
* Copyright 2022 CloudWeGo Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package main

import (
"context"

"github.com/cloudwego/hertz/pkg/app"
"github.com/cloudwego/hertz/pkg/app/server"
"github.com/cloudwego/hertz/pkg/network/standard"
"github.com/cloudwego/hertz/pkg/protocol/consts"
)

func main() {
// The default listening port is 8888.
// You can modify it with server.WithHostPorts().
h := server.Default(
server.WithHostPorts("127.0.0.1:8080"),
server.WithMaxRequestBodySize(20<<20),
server.WithTransport(standard.NewTransporter),
)

h.GET("/hello", func(ctx context.Context, c *app.RequestContext) {
c.String(consts.StatusOK, "Hello hertz!")
})

h.Spin()

jwt

啃了相当一段时间hertz的jwt的文档,卡住的主要原因是之前用的gin的jwt,都是自己手动写分发token,解析token等一堆内容,现在用了hertz之后,在官方文档和demo仓库里一直没找着究竟是在哪里分发的token,硬是把jwt那个文档都啃了一遍都没发现究竟是在哪里生成的token,最后才发现token是自动生成并分发的…
当用户登陆后,在登陆成功后会由hertz的框架自动返回一个token,而这个token需要加在Authorization里,以**** ************.*********.*********的格式才能鉴权成功,但是gin里面的token默认是没有token前面的字段的(这里默认是Bearer,我在自己程序中修改为taosu了)

swagger

跟gin的swagger大差不差,就是要注意这一行 _ "memorandum/docs"
这里不是官方文档提供的内容,要修改成自己项目目录中的docs(我还因为去提了个蠢蛋issue)

注意事项

注意用DB时要提前定义,不能:=
注意要初始化定义的jwt函数
注意使用DB时最好加上Table
注意gorm操作数据库的操作都是根据主键(gorm.Model)来的,也就是说如果一个表单没有主键,gorm无法操作(应该是这样的)

docker

最喜欢的docker环节!
一开始打算照着go学后端的一个理由就是听说go能对docker进行二开,之前的大部分捣鼓范畴就是找个好玩的docker镜像然后pull下来再run,现在终于轮到我自己来整镜像了

dockerfile

参考 https://juejin.cn/post/7207336474149552165

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
FROM	指定基础镜像,用于后续的指令构建。
MAINTAINER 指定Dockerfile的作者/维护者。(已弃用,推荐使用LABEL指令)
LABEL 添加镜像的元数据,使用键值对的形式。
RUN 在构建过程中在镜像中执行命令。
CMD 指定容器创建时的默认命令。(可以被覆盖)
ENTRYPOINT 设置容器创建时的主要命令。(不可被覆盖)
EXPOSE 声明容器运行时监听的特定网络端口。
ENV 在容器内部设置环境变量。
ADD 将文件、目录或远程URL复制到镜像中。
COPY 将文件或目录复制到镜像中。
VOLUME 为容器创建挂载点或声明卷。
WORKDIR 设置后续指令的工作目录。
USER 指定后续指令的用户上下文。
ARG 定义在构建过程中传递给构建器的变量,可使用 "docker build" 命令设置。
ONBUILD 当该镜像被用作另一个构建过程的基础时,添加触发器。
STOPSIGNAL 设置发送给容器以退出的系统调用信号。
HEALTHCHECK 定义周期性检查容器健康状态的命令。
SHELL 覆盖Docker中默认的shell,用于RUN、CMD和ENTRYPOINT指令。

只能说入门教程还得是菜鸟教程,很适合我这种菜鸟

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
FROM golang:alpine as build
LABEL authors="taosu"
ENV GOPROXY=https://goproxy.cn,direct \
GO111MODULE=on \
GOARCH=amd64
WORKDIR /go/src/memorandum
COPY go.mod .
COPY go.sum .
RUN go mod download
COPY . .
RUN go build -o memorandum .
FROM scratch as deploy
COPY --from=build /go/src/memorandum/memorandum .
COPY --from=build /go/src/memorandum/config.yaml .
CMD ["./memorandum"]

一行一行解释
先是FORM golang:alpine as build,引入一个镜像,镜像名是golang,版本号是alpine ,build是别名
这里我对这个镜像的理解是创造了一个配好go编译环境的沙箱环境(应该也是linux系统的情况,毕竟alpine就是一个linux os)
然后LABEL authors="taosu",这个没啥好说的,就是添加作者名字,写不写都行
然后ENV 配置环境变量,这个也没啥(有个注意的点是,dockerfile每多一行,好像都会占用不少内存,所以这里设置环境变量不是设置多个env,而是用\来换行,这样这些设置环境变量本质还是只有一行)
WORKDIR
这个是在镜像中自己设置的目录结构,这里就相当于在要生成的镜像中创建了一个go目录,又在go目录中创建了src目录,然后又在src目录下创建了memorandum目录
COPY
是把当前(编写dockerfile的目录)下的go.mod和go.sum复制到/go/src/memorandum下,最后的.代表着当前目录
RUN
就是在命令行中执行go mod download,这一步的目的是把当前需要的依赖都下载下来,保存到镜像中(go mod tidy应该也行?)
COPY . .
是把当前目录所有文件及子目录都复制到镜像里
RUN
没啥好说的,编译文件,记得后面加个.
然后是新的 FROM ,一个dockerfile可以写很多个from,但是最后生成的镜像是最后一个from进去的,但是这反而有个好处,比如前面golang环境,只要编译完程序之后,其实镜像就不再需要go环境了,这样只需要把编译生成的可执行文件放进新的镜像直接运行就可以了,很省空间
scratch是docker官方提供的一个特殊的镜像(空白镜像,里面什么都没有)
COPY
把上一个镜像生成的可执行文件和配置文件复制进新的镜像
CMD
运行可执行文件

如果直接运行的话就是docker build -t memorandum .
记得加上最后的 .

其实这里一开始还想加上mysql的,但是alpine当os下起来怪麻烦的,而且报错还看不了(也可能是我不会看,反正很难搞),用经常用的os比如ubuntu那种,又太臃肿了,这里本来都打算放弃在docker里配mysql了,最后发现了另一个好东西——docker-compose.yaml

docker-compose.yaml

参考 https://juejin.cn/post/7273024681632333885

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
version: '3'
networks:
app-network:
driver: bridge
services:
go-server:
build: .
ports:
- "6789:6789"
depends_on:
- mysql
networks:
- app-network

mysql:
container_name: mysql
image: mysql
environment:
- MYSQL_DATABASE=memorandum
- MYSQL_USER=taosu
- MYSQL_PASSWORD=0216
- MYSQL_ROOT_PASSWORD=root
networks:
- app-network

第一个version是docker-compose.yaml的一个版本,现在3是最新的

networks是关键字,定义了docker的网络
app-network是自定义的网络名字(这里是为了让go后端服务能访问mysql)
driver是关键字,选择桥接

services也是关键字,表明要创建的服务
go-server是自定义的名字,表示一个要构建的容器
build是关键字, .是代表当前路径,整个意思就是build在当前目录下寻找dockerfile并进行build
ports是关键字,这个docker用的多了看着就是老熟人了,就是一个端口映射的东西,第一个6789代表要在宿主机进行映射的端口,第二个代表容器开放的端口,就是把容器的6789映射到宿主机的6789端口,外界访问宿主机ip:6789时实际是在访问容器ip:6789
depends_on是关键字,意思是要先启动mysql这个容器,才能启动go-server这个容器

mysql
container_name是容器别名(这个mysql也是自定义的),能起到一个类似dns的效果,比如go-server这个容器要访问mysql数据库时,就不再是127.0.0.1:3306了,而是mysql:3306,这里会把mysql自动解析成mysql这个容器对应的ip(所有config.yaml中的host也不再是127.0.0.1,而是mysql了)
image,关键字,表明这个容器所引用的镜像是哪个,这里是直接mysql,但mysql后面也可以加版本号,比如mysql:latest,mysql:8.0这样的
environment,关键字,注意environment的写法是key=value而不是:,在写的时候没注意这个错了好多次
创建一个数据库,名字为memorandum
建立一个普通用户,名字为taosu
密码为0216
root用户的密码为root

完成后的cli命令
docker-compose -f docker-compose.yml up -d
-f是指定docker-compose.yml的路径
up是开始构建(类似run的操作?)
-d是值完成后在后台运行(不占用终端)


go查漏补缺2.0
https://blog.yblue.top/2023/11/13/go查漏补缺2-0/
Posted on
November 13, 2023
Licensed under