File size: 7,416 Bytes
7107f0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
package lanzou

import (
	"bytes"
	"fmt"
	"net/http"
	"regexp"
	"strconv"
	"strings"
	"time"
	"unicode"

	log "github.com/sirupsen/logrus"
)

const DAY time.Duration = 84600000000000

// 解析时间
var timeSplitReg = regexp.MustCompile("([0-9.]*)\\s*([\u4e00-\u9fa5]+)")

// 如果解析失败,则返回当前时间
func MustParseTime(str string) time.Time {
	lastOpTime, err := time.ParseInLocation("2006-01-02 -07", str+" +08", time.Local)
	if err != nil {
		strs := timeSplitReg.FindStringSubmatch(str)
		lastOpTime = time.Now()
		if len(strs) == 3 {
			i, _ := strconv.ParseInt(strs[1], 10, 64)
			ti := time.Duration(-i)
			switch strs[2] {
			case "秒前":
				lastOpTime = lastOpTime.Add(time.Second * ti)
			case "分钟前":
				lastOpTime = lastOpTime.Add(time.Minute * ti)
			case "小时前":
				lastOpTime = lastOpTime.Add(time.Hour * ti)
			case "天前":
				lastOpTime = lastOpTime.Add(DAY * ti)
			case "昨天":
				lastOpTime = lastOpTime.Add(-DAY)
			case "前天":
				lastOpTime = lastOpTime.Add(-DAY * 2)
			}
		}
	}
	return lastOpTime
}

// 解析大小
var sizeSplitReg = regexp.MustCompile(`(?i)([0-9.]+)\s*([bkm]+)`)

// 解析失败返回0
func SizeStrToInt64(size string) int64 {
	strs := sizeSplitReg.FindStringSubmatch(size)
	if len(strs) < 3 {
		return 0
	}

	s, _ := strconv.ParseFloat(strs[1], 64)
	switch strings.ToUpper(strs[2]) {
	case "B":
		return int64(s)
	case "K":
		return int64(s * (1 << 10))
	case "M":
		return int64(s * (1 << 20))
	}
	return 0
}

// 移除注释
func RemoveNotes(html string) string {
	return regexp.MustCompile(`<!--.*?-->|[^:]//.*|/\*.*?\*/`).ReplaceAllStringFunc(html, func(b string) string {
		if b[1:3] == "//" {
			return b[:1]
		}
		return "\n"
	})
}

var findAcwScV2Reg = regexp.MustCompile(`arg1='([0-9A-Z]+)'`)

// 在页面被过多访问或其他情况下,有时候会先返回一个加密的页面,其执行计算出一个acw_sc__v2后放入页面后再重新访问页面才能获得正常页面
// 若该页面进行了js加密,则进行解密,计算acw_sc__v2,并加入cookie
func CalcAcwScV2(html string) (string, error) {
	log.Debugln("acw_sc__v2", html)
	acwScV2s := findAcwScV2Reg.FindStringSubmatch(html)
	if len(acwScV2s) != 2 {
		return "", fmt.Errorf("无法匹配acw_sc__v2")
	}
	return HexXor(Unbox(acwScV2s[1]), "3000176000856006061501533003690027800375"), nil
}

func Unbox(hex string) string {
	var box = []int{6, 28, 34, 31, 33, 18, 30, 23, 9, 8, 19, 38, 17, 24, 0, 5, 32, 21, 10, 22, 25, 14, 15, 3, 16, 27, 13, 35, 2, 29, 11, 26, 4, 36, 1, 39, 37, 7, 20, 12}
	var newBox = make([]byte, len(hex))
	for i := 0; i < len(box); i++ {
		j := box[i]
		if len(newBox) > j {
			newBox[j] = hex[i]
		}
	}
	return string(newBox)
}

func HexXor(hex1, hex2 string) string {
	out := bytes.NewBuffer(make([]byte, len(hex1)))
	for i := 0; i < len(hex1) && i < len(hex2); i += 2 {
		v1, _ := strconv.ParseInt(hex1[i:i+2], 16, 64)
		v2, _ := strconv.ParseInt(hex2[i:i+2], 16, 64)
		out.WriteString(strconv.FormatInt(v1^v2, 16))
	}
	return out.String()
}

var findDataReg = regexp.MustCompile(`data[:\s]+({[^}]+})`)    // 查找json
var findKVReg = regexp.MustCompile(`'(.+?)':('?([^' },]*)'?)`) // 拆分kv

// 根据key查询js变量
func findJSVarFunc(key, data string) string {
	var values []string
	if key != "sasign" {
		values = regexp.MustCompile(`var ` + key + `\s*=\s*['"]?(.+?)['"]?;`).FindStringSubmatch(data)
	} else {
		matches := regexp.MustCompile(`var `+key+`\s*=\s*['"]?(.+?)['"]?;`).FindAllStringSubmatch(data, -1)
		if len(matches) == 3 {
			values = matches[1]
		} else {
			if len(matches) > 0 {
				values = matches[0]
			}
		}
	}
	if len(values) == 0 {
		return ""
	}
	return values[1]
}

var findFunction = regexp.MustCompile(`(?ims)^function[^{]+`)
var findFunctionAll = regexp.MustCompile(`(?is)function[^{]+`)

// 查找所有方法位置
func findJSFunctionIndex(data string, all bool) [][2]int {
	findFunction := findFunction
	if all {
		findFunction = findFunctionAll
	}

	indexs := findFunction.FindAllStringIndex(data, -1)
	fIndexs := make([][2]int, 0, len(indexs))

	for _, index := range indexs {
		if len(index) != 2 {
			continue
		}
		count, data := 0, data[index[1]:]
		for ii, v := range data {
			if v == ' ' && count == 0 {
				continue
			}
			if v == '{' {
				count++
			}

			if v == '}' {
				count--
			}
			if count == 0 {
				fIndexs = append(fIndexs, [2]int{index[0], index[1] + ii + 1})
				break
			}
		}
	}
	return fIndexs
}

// 删除JS全局方法
func removeJSGlobalFunction(html string) string {
	indexs := findJSFunctionIndex(html, false)
	block := make([]string, len(indexs))
	for i, next := len(indexs)-1, len(html); i >= 0; i-- {
		index := indexs[i]
		block[i] = html[index[1]:next]
		next = index[0]
	}
	return strings.Join(block, "")
}

// 根据名称获取方法
func getJSFunctionByName(html string, name string) (string, error) {
	indexs := findJSFunctionIndex(html, true)
	for _, index := range indexs {
		data := html[index[0]:index[1]]
		if regexp.MustCompile(`function\s+` + name + `[()\s]+{`).MatchString(data) {
			return data, nil
		}
	}
	return "", fmt.Errorf("not find %s function", name)
}

// 解析html中的JSON,选择最长的数据
func htmlJsonToMap2(html string) (map[string]string, error) {
	datas := findDataReg.FindAllStringSubmatch(html, -1)
	var sData string
	for _, data := range datas {
		if len(datas) > 0 && len(data[1]) > len(sData) {
			sData = data[1]
		}
	}
	if sData == "" {
		return nil, fmt.Errorf("not find data")
	}
	return jsonToMap(sData, html), nil
}

// 解析html中的JSON
func htmlJsonToMap(html string) (map[string]string, error) {
	datas := findDataReg.FindStringSubmatch(html)
	if len(datas) != 2 {
		return nil, fmt.Errorf("not find data")
	}
	return jsonToMap(datas[1], html), nil
}

func jsonToMap(data, html string) map[string]string {
	var param = make(map[string]string)
	kvs := findKVReg.FindAllStringSubmatch(data, -1)
	for _, kv := range kvs {
		k, v := kv[1], kv[3]
		if v == "" || strings.Contains(kv[2], "'") || IsNumber(kv[2]) {
			param[k] = v
		} else {
			param[k] = findJSVarFunc(v, html)
		}
	}
	return param
}

func IsNumber(str string) bool {
	for _, s := range str {
		if !unicode.IsDigit(s) {
			return false
		}
	}
	return true
}

var findFromReg = regexp.MustCompile(`data : '(.+?)'`) // 查找from字符串

// 解析html中的form
func htmlFormToMap(html string) (map[string]string, error) {
	forms := findFromReg.FindStringSubmatch(html)
	if len(forms) != 2 {
		return nil, fmt.Errorf("not find file sgin")
	}
	return formToMap(forms[1]), nil
}

func formToMap(from string) map[string]string {
	var param = make(map[string]string)
	for _, kv := range strings.Split(from, "&") {
		kv := strings.SplitN(kv, "=", 2)[:2]
		param[kv[0]] = kv[1]
	}
	return param
}

var regExpirationTime = regexp.MustCompile(`e=(\d+)`)

func GetExpirationTime(url string) (etime time.Duration) {
	exps := regExpirationTime.FindStringSubmatch(url)
	if len(exps) < 2 {
		return
	}
	timestamp, err := strconv.ParseInt(exps[1], 10, 64)
	if err != nil {
		return
	}
	etime = time.Duration(timestamp-time.Now().Unix()) * time.Second
	return
}

func CookieToString(cookies []*http.Cookie) string {
	if cookies == nil {
		return ""
	}
	cookieStrings := make([]string, len(cookies))
	for i, cookie := range cookies {
		cookieStrings[i] = cookie.Name + "=" + cookie.Value
	}
	return strings.Join(cookieStrings, ";")
}