database.js
- 📅 2021-07-28T11:43:43.000Z
- 👁️ 312 katselukertaa
- 🔓 Julkinen
const got = require('got');
const sqlite3 = require('sqlite3').verbose();
const pages = 5729
let db = new sqlite3.Database('./images.db');
/*
TODO:
- check when image has been taken
- check municipality
*/
let sql = `
CREATE TABLE IF NOT EXISTS Vehicle (
id INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
big_path TEXT,
thumb_path TEXT,
municipality TEXT,
taken DATE,
name TEXT
);
`;
db.run(sql, [], (err) => {
if (err) throw err
});
db.close();
const indexImage = async (bigImagePath, thumbImagePath, name) => {
let db = new sqlite3.Database('./images.db');
let sql = `INSERT INTO Vehicle (big_path, thumb_path, name) VALUES (?, ?, ?);`;
db.run(sql, [bigImagePath, thumbImagePath, name], function(err) {
if (err) throw err
console.log(`Indexed image: ${name}`)
});
db.close();
}
const scrapeImagesFromPage = async (id) => {
try {
console.log(`Scraping page ${id}`)
const regexpLink = /<img src="(albums\/userpics\/\w+\/\w+\.jpg)"/g;
const regexpTitle = /<span class="thumb_title thumb_title_title">(.*)<\/span><span class="thumb_title thumb_title_views">/g
const response = await got(`https://www.fireimages.net/thumbnails.php?album=lastup&page=${id}`);
const images = [...response.body.matchAll(regexpLink)]
const titles = [...response.body.matchAll(regexpTitle)]
images.forEach((image, idx) => {
const name = titles[idx][1]
const bigImagePath = `https://www.fireimages.net/${image[1].replace('thumb_', '')}`
const thumbImagePath = `https://www.fireimages.net/${image[1]}`
indexImage(bigImagePath, thumbImagePath, name)
});
} catch (error) {
console.error(error);
}
}
let timeoutMs = 0;
for (let page_id = 1; page_id <= pages; page_id++) {
setTimeout(() => { scrapeImagesFromPage(page_id)}, timeoutMs += 1000);
}