aboutsummaryrefslogtreecommitdiff
path: root/init.js
blob: ac455223c05f8ba699d419200a6e3fdfa4c8f111 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
const fetch = require("node-fetch")
const cheerio = require("cheerio")
const jsonic = require("jsonic")

module.exports.search = (query, userAgent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:97.0) Gecko/20100101 Firefox/97.0") =>
	fetch("https://www.google.com/search?tbm=isch&q=" + encodeURIComponent(query), {headers: {"User-Agent": userAgent}}).then(res => res.text()).then(data =>
		cheerio.load(data, null, false)                                               // parse HTML
			("script")                                                                // find script tags
			.toArray()                                                                // convert cheerio list to array
			.map(script => script.children[0]?.data)                                  // map script tags to their inline code
			.filter(script => script?.startsWith("AF_initDataCallback"))              // find script that contains init data
			.map(script => script.slice("AF_initDataCallback(".length, -");".length)) // remove call to init function
			.map(jsonic)                                                              // jsonic is used because JSON.parse() requires strict JSON and eval() allows remote code execution
			.find(data => data.key == "ds:1")                                         // for some reason there are two init datas, one is empty tho
			.data[31][0][12][2].map(elem => elem[1] && new Object({                   // map the parts of the init data we know/care about to something readable
				image: {
					url: elem[1][3][0],
					size: {
						width: elem[1][3][2],
						height: elem[1][3][1],
					},
				},
				preview: {
					url: elem[1][2][0],
					size: {
						width: elem[1][2][2],
						height: elem[1][2][1],
					},
				},
				color: elem[1][6],          // probably average color of the image (used as placeholder while loading the image)
				link: elem[1][9][2003][2],
				title: elem[1][9][2003][3], // there is some more data in elem[1][9] that could potentially be useful
			}))
			.filter(elem => elem)
	)

module.exports.searchRandom = (query, userAgent) => module.exports.search(query, userAgent)
	.then(results => results[Math.floor(Math.random() * results.length)])

/*

In case google makes changes, here are some snippets used to reverse engineer the format:

1. Find which script contains the init data (use the query astolfo+images for this)
-----------------------------------------------------------------------------------

	scripts.find(script => script.search("https://steamcdn-a.akamaihd.net/steamcommunity/public/images/items/622220/f4d2d4074167411a7e15b9a845cf18b434c02af3.jpg") >= 0)

2. Reverse engineer format of init data passed to AF_initDataCallback
---------------------------------------------------------------------

const findStrings = (obj, path = "") => {
	let found = []

	for (k in obj) {
		let v = obj[k]
		let t = typeof v
		let p = path + "." + k

		if (t == "object")
			found = found.concat(findStrings(v, p))
		else if (t == "string")
			found.push([v, p])
	}

	return found
}

	console.log(findStrings(initData))

*/