// Constants
const CONFIG = {
USER_AGENT: "Googlebot/2.1 (+http://www.google.com/bot.html)",
RECIPIENT_EMAIL: "tom.spaccialbelli@gmail.com",
PRODUCT_BLOCK_REGEX: /
]*data-sku[^>]*>([\s\S]*?)(?=]*data-sku)/g,
LINK_REGEX: /(\d+)/g,
SEE_MORE_PRICE_REGEX: /class="hideFromPro lpPictosMsg"[^>]*>\+ d'offres à partir de (\d+),\d+€/g,
SHEETS: {
CAT: "CAT",
PRODUCTS: "PRODUCTS",
COUNT: "COUNT"
},
RANGES: {
CAT_URLS: "B2:B",
PRODUCTS_DATA: { startRow: 2, startCol: 1, numCols: 2 },
COUNT_DATA: { startRow: 2, startCol: 1, numCols: 2 }
},
BOT_DETECTION_TEXT: "Comment activer le javascript",
PRICE_CHANGE_THRESHOLD: 0.4,
BASE_URL: "https://www.cdiscount.com/ProductListUC.mvc/UpdateJsonPage?page=",
POST_HEADER : {
"accept": "application/json, text/javascript, */*; q=0.01",
"accept-language": "en-US,en;q=0.5",
"cache-control": "no-cache",
"content-type": "application/x-www-form-urlencoded; charset=UTF-8",
"pragma": "no-cache",
"user-agent": "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
"sec-ch-ua": "\"Googlebot\";v=\"2.1\", \"(compatible; Googlebot/2.1; +http://www.google.com/bot.html)\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Linux\"",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"sec-gpc": "1",
"x-requested-with": "XMLHttpRequest"
},
};
function getCountData(countSheet) {
const { startRow, startCol, numCols } = CONFIG.RANGES.COUNT_DATA;
const lastRow = countSheet.getLastRow();
if (lastRow >= startRow) {
const numRows = lastRow - startRow + 1;
return countSheet.getRange(startRow, startCol, numRows, numCols).getValues();
} else {
return [];
}
}
function updateCountData(countSheet, requestCount) {
const today = new Date().toLocaleDateString();
const countData = getCountData(countSheet);
let rowIndex = countData.findIndex(data => {
const dateStr = data[0].toLocaleDateString();
return dateStr === today;
});
if (rowIndex === -1) {
// If the date doesn't exist, add a new row with the date and request count
rowIndex = countData.length;
countSheet.getRange(CONFIG.RANGES.COUNT_DATA.startRow + rowIndex, CONFIG.RANGES.COUNT_DATA.startCol).setValue(today);
countSheet.getRange(CONFIG.RANGES.COUNT_DATA.startRow + rowIndex, CONFIG.RANGES.COUNT_DATA.startCol + 1).setValue(requestCount);
} else {
// If the date already exists, update the request count in the existing row
const existingCount = countData[rowIndex][1];
countSheet.getRange(CONFIG.RANGES.COUNT_DATA.startRow + rowIndex, CONFIG.RANGES.COUNT_DATA.startCol + 1).setValue(existingCount + requestCount);
}
}
// Utility function to parse price from string to integer
function parsePrice(priceString) {
return priceString ? parseInt(priceString, 10) : null;
}
// Utility function to log price change
function logPriceChange(url, previousPrice, newPrice) {
Logger.log(`Price change detected for ${url}`);
Logger.log(`Previous price: ${previousPrice}, New price: ${newPrice}`);
}
function checkBotDetection(responseText) {
return responseText.includes(CONFIG.BOT_DETECTION_TEXT);
}
function parseProductData(productBlockMatch) {
const linkMatches = productBlockMatch.match(CONFIG.LINK_REGEX) || [];
const priceMatches = productBlockMatch.match(CONFIG.PRICE_REGEX) || [];
if (linkMatches.length === 0 || priceMatches.length === 0) {
return null;
}
const productUrl = linkMatches[0].match(/href="([^"]*)"/)[1];
const price = parsePrice(priceMatches[0].split('>')[1]);
const seeMorePriceMatch = productBlockMatch.match(CONFIG.SEE_MORE_PRICE_REGEX);
const seeMorePrice = seeMorePriceMatch ? parsePrice(seeMorePriceMatch[0].match(/(\d+),\d+/)) : null;
const lowestPrice = seeMorePrice !== null && seeMorePrice < price ? seeMorePrice : price;
return { productUrl, lowestPrice };
}
function comparePrice(previousPrice, lowestPrice) {
if (!isNaN(previousPrice) && !isNaN(lowestPrice) && lowestPrice !== null && lowestPrice !== "" && lowestPrice < previousPrice * CONFIG.PRICE_CHANGE_THRESHOLD) {
return true;
}
return false;
}
function processProductData(productBlockMatches, existingData) {
const updatedProductData = [];
const newProductData = [];
for (const productBlockMatch of productBlockMatches) {
if (productBlockMatch.includes("{{nodeid}}") || productBlockMatch.includes("sponsorisés")) {
continue;
}
const productData = parseProductData(productBlockMatch);
if (productData) {
const { productUrl, lowestPrice } = productData;
const existingProductIndex = existingData.findIndex(data => data[0] === productUrl);
if (existingProductIndex !== -1) {
const previousPrice = parsePrice(existingData[existingProductIndex][1]);
if (comparePrice(previousPrice, lowestPrice)) {
sendEmailNotification('', productUrl, previousPrice, lowestPrice);
updatedProductData.push([productUrl, lowestPrice]);
}
} else {
newProductData.push([productUrl, lowestPrice]);
}
}
}
return { updatedProductData, newProductData };
}
function fetchProductData(responseText, payload, existingData) {
if (checkBotDetection(responseText)) {
return { botBlacklisted: true };
}
try {
const data = JSON.parse(responseText);
const html = data.productsHtml;
const productBlockMatches = html.match(CONFIG.PRODUCT_BLOCK_REGEX) || [];
const { updatedProductData, newProductData } = processProductData(productBlockMatches, existingData);
return { updatedProductData, newProductData, totalResultCount: data.totalResultCount };
} catch (error) {
Logger.log(`Error parsing response: ${error.message}`);
return { updatedProductData: [], newProductData: [], totalResultCount: 0 };
}
}
// Function to get existing product data from the sheet
function getExistingProductData(productsSheet) {
const { startRow, startCol, numCols } = CONFIG.RANGES.PRODUCTS_DATA;
const lastRow = productsSheet.getLastRow();
if (lastRow >= startRow) {
const numRows = lastRow - startRow + 1;
return productsSheet.getRange(startRow, startCol, numRows, numCols).getValues();
} else {
// If there are no rows in the specified range, return an empty array
return [];
}
}
function writeProductData(productsSheet, updatedProductData, newProductData) {
const { startRow, startCol, numCols } = CONFIG.RANGES.PRODUCTS_DATA;
const lastRow = productsSheet.getLastRow();
let existingData = [];
if (lastRow >= startRow) {
const numRows = lastRow - startRow + 1;
existingData = productsSheet.getRange(startRow, startCol, numRows, numCols).getValues();
}
// Update existing product prices
for (const [productUrl, newPrice] of updatedProductData) {
const rowIndex = existingData.findIndex(data => data[0] === productUrl);
if (rowIndex !== -1) {
productsSheet.getRange(startRow + rowIndex, startCol + 1).setValue(newPrice);
}
}
// Append new product data
if (newProductData.length > 0) {
const newDataStartRow = existingData.length + startRow;
const newDataRange = productsSheet.getRange(newDataStartRow, startCol, newProductData.length, numCols);
newDataRange.setValues(newProductData);
}
}
// Main function to scrape Cdiscount pages and write product data to the "PRODUCTS" sheet
function scrapeCdiscountPages() {
const spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
const catSheet = spreadsheet.getSheetByName(CONFIG.SHEETS.CAT);
const productsSheet = spreadsheet.getSheetByName(CONFIG.SHEETS.PRODUCTS);
const countSheet = spreadsheet.getSheetByName(CONFIG.SHEETS.COUNT);
const payloads = catSheet.getRange(CONFIG.RANGES.CAT_URLS).getValues().flat().filter(payload => payload !== "");
const existingData = getExistingProductData(productsSheet);
const updatedProductData = [];
let newProductData = [];
// Initial request to check if the bot is blacklisted
const initialUrl = "https://www.cdiscount.com/electromenager/lavage-sechage/achat-seche-linge/l-1100105.html";
const initialResponse = UrlFetchApp.fetch(initialUrl, {
method: "GET",
headers: {
"User-Agent": "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
},
followRedirects: false
});
if (initialResponse.getContentText().includes(CONFIG.BOT_DETECTION_TEXT)) {
Logger.log("Not the good IP address");
return;
}
const firstPageRequests = payloads.map(payload => ({
url: CONFIG.BASE_URL + 1,
method: "POST",
headers: CONFIG.POST_HEADER,
payload: payload,
followRedirects: false,
validateHttpsCertificates: false,
}));
const firstPageResponses = UrlFetchApp.fetchAll(firstPageRequests);
let totalRequestCount = 0;
for (let i = 0; i < payloads.length; i++) {
const payload = payloads[i];
const { totalResultCount, botBlacklisted } = fetchProductData(firstPageResponses[i].getContentText(), payload, existingData);
if (botBlacklisted) {
Logger.log("Not the good IP address");
return;
}
const totalPages = Math.ceil(totalResultCount / 50);
const pageUrls = [];
for (let page = 1; page <= totalPages; page++) {
const url = CONFIG.BASE_URL + page;
pageUrls.push(url);
}
const requests = pageUrls.map(url => ({
url: url,
method: "POST",
headers: CONFIG.POST_HEADER,
payload: payload,
followRedirects: false,
muteHttpExceptions: true,
validateHttpsCertificates: false,
}));
try {
const responses = UrlFetchApp.fetchAll(requests);
totalRequestCount += responses.length;
for (const response of responses) {
const { updatedProductData: updatedData, newProductData: newData } = fetchProductData(response.getContentText(), payload, existingData);
updatedProductData.push(...updatedData);
newProductData.push(...newData);
}
} catch (error) {
Logger.log(`Error fetching requests: ${error.message}`);
// Continue with the next payload
continue;
}
}
// Remove duplicates from newProductData
newProductData = Array.from(new Set(newProductData.map(JSON.stringify)), JSON.parse);
writeProductData(productsSheet, updatedProductData, newProductData);
updateCountData(countSheet, totalRequestCount);
}
function sendEmailNotification(name, url, previousPrice, newPrice) {
const subject = "Price Change Alert";
let body = `The price of the product has changed.\n\nURL: ${url}\nPrevious Price: ${previousPrice}\nNew Price: ${newPrice}`;
if (name) {
body = `The price of the product "${name}" has changed.\n\nURL: ${url}\nPrevious Price: ${previousPrice}\nNew Price: ${newPrice}`;
}
GmailApp.sendEmail(CONFIG.RECIPIENT_EMAIL, subject, body, {cc: 'valentin.francon@gmail.com'});
}