aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorElias Fleckenstein <eliasfleckenstein@web.de>2022-03-05 10:37:21 +0100
committerElias Fleckenstein <eliasfleckenstein@web.de>2022-03-05 10:37:21 +0100
commitc2b110c40aadd2790eb7e04b03c7c9fd879b3616 (patch)
treeb68a50f32aacf498c7cb042739765124fcf487ee
parent93f0e0569bd595c5472aad2607041f6829ce75cf (diff)
downloadgoogle_images-c2b110c40aadd2790eb7e04b03c7c9fd879b3616.tar.xz
Fix script selection
-rw-r--r--init.js55
-rw-r--r--package-lock.json2
-rw-r--r--package.json2
3 files changed, 32 insertions, 27 deletions
diff --git a/init.js b/init.js
index fd566ad..cede7a1 100644
--- a/init.js
+++ b/init.js
@@ -2,36 +2,41 @@ const fetch = require("node-fetch")
const cheerio = require("cheerio")
const jsonic = require("jsonic")
+const debug = arg => {
+ console.log(arg)
+ return arg
+}
+
module.exports.search = (query, userAgent = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:97.0) Gecko/20100101 Firefox/97.0") =>
fetch("https://www.google.com/search?tbm=isch&q=" + encodeURIComponent(query), {headers: {"User-Agent": userAgent}}).then(res => res.text()).then(data =>
- jsonic( // jsonic is used because JSON.parse() requires strict JSON and eval() allows for remote code execution
- cheerio.load(data, null, false) // parse HTML
- ("script") // find script tags
- .toArray() // convert cheerio list to array
- .map(script => script.children[0]?.data) // map script tags to their inline code
- .find(script => script?.startsWith("AF_initDataCallback")) // find script that contains init data
- .slice("AF_initDataCallback(".length, -");".length) // remove call to init function
- ).data[31][0][12][2].map(elem => new Object({ // map the parts of the init data we know/care about to something readable
- image: {
- url: elem[1][3][0],
- size: {
- width: elem[1][3][2],
- height: elem[1][3][1],
+ cheerio.load(data, null, false) // parse HTML
+ ("script") // find script tags
+ .toArray() // convert cheerio list to array
+ .map(script => script.children[0]?.data) // map script tags to their inline code
+ .filter(script => script?.startsWith("AF_initDataCallback")) // find script that contains init data
+ .map(script => script.slice("AF_initDataCallback(".length, -");".length)) // remove call to init function
+ .map(jsonic) // jsonic is used because JSON.parse() requires strict JSON and eval() allows remote code execution
+ .find(data => data.key == "ds:1") // for some reason there are two init datas, one is empty tho
+ .data[31][0][12][2].map(elem => new Object({ // map the parts of the init data we know/care about to something readable
+ image: {
+ url: elem[1][3][0],
+ size: {
+ width: elem[1][3][2],
+ height: elem[1][3][1],
+ },
},
- },
- preview: {
- url: elem[1][2][0],
- size: {
- width: elem[1][2][2],
- height: elem[1][2][1],
+ preview: {
+ url: elem[1][2][0],
+ size: {
+ width: elem[1][2][2],
+ height: elem[1][2][1],
+ },
},
- },
- color: elem[1][6], // average color of the image, probably (used as placeholder while loading the image)
- link: elem[1][9][2003][2],
- title: elem[1][9][2003][3], // there is some more data in elem[1][9] that could potentially be useful
- }))
+ color: elem[1][6], // probably average color of the image (used as placeholder while loading the image)
+ link: elem[1][9][2003][2],
+ title: elem[1][9][2003][3], // there is some more data in elem[1][9] that could potentially be useful
+ }))
)
-
/*
In case google makes changes, here are some snippets used to reverse engineer the format:
diff --git a/package-lock.json b/package-lock.json
index 7af8f43..2acc47d 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,6 +1,6 @@
{
"name": "google_images",
- "version": "1.0.1",
+ "version": "1.0.2",
"lockfileVersion": 2,
"requires": true,
"packages": {
diff --git a/package.json b/package.json
index 2a702be..e5a021f 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "free-google-images",
- "version": "1.0.1",
+ "version": "1.0.2",
"description": "Reverse Engineered Google Image Search API",
"main": "init.js",
"scripts": {