Backup Google Search Console Data Into MySQL With Python

Share this post

This post is part of the Guide on Python for SEO and the guide on the Google Search Console API

This guide will show you the easiest way to automatically backup your Google Search Console Data in MySQL using Python.

Why Use a MySQL Database for SEO?

As SEOs, a lot of time, if you work on larger websites, you will run into a storage problem where you can’t load a CSV file or an excel file because you have too much data.

MySQL can give you a place to store your data. With MySQL, you can have a place to store your Google Search Console backups.

Also, Google Data Studio has a connector for MySQL (limited to 100k rows, unfortunately). This way you can start automating your SEO reports.

Get Started

Before you can save your Google Search Console data to a MySQL database you need a few things.

Create Your MySQL Database

If you have not created your database already, you need to create one to mirror the Google Search Console Data.

import pymysql.cursors

# Create a mySQL Database
# Establish connection
connection = pymysql.connect(host='localhost',
                             user='root',
                             port='',
                             password='')

# Simulate the CREATE DATABASE function of mySQL
try:
    with connection.cursor() as cursor:
        cursor.execute('CREATE DATABASE gsc_db')

finally:
    connection.close()
    
    
# Create a table https://www.youtube.com/watch?v=YWVx4t8FAwU
connection = pymysql.connect(host='localhost',
                             user='root',
                             port='',
                             password='',
                             db='gsc_db',
                             cursorclass=pymysql.cursors.DictCursor)

try:
    with connection.cursor() as cursor:
        sqlQuery = '''CREATE TABLE IF NOT EXISTS backup_gsc(Date DATE, 
                                                            Page LONGTEXT, 
                                                            Query LONGTEXT, 
                                                            Clicks INT, 
                                                            Impressions INT, 
                                                            Ctr DECIMAL(10,2), 
                                                            Position DECIMAL(3,2))''' 
        cursor.execute(sqlQuery)
finally:
    connection.close()

Create Your Google Search Console API Call Script

If you don’t know how to do this, look at this guide: how to use Google Search Console API with Python,

"""
Step 1: Get your JSON API Key
"""
#https://www.jcchouinard.com/how-to-get-google-search-console-api-keys/ 
# Make sur the you name you JSON api key client_secrets.json 

"""
Step 2: Import Libraries
"""

import pandas as pd
import datetime
import httplib2
from apiclient.discovery import build
from collections import defaultdict
from dateutil import relativedelta
import argparse
from oauth2client import client
from oauth2client import file
from oauth2client import tools

"""
Step 3: Connect to the API
"""
site = 'https://www.yoursite.com'

SCOPES = ['https://www.googleapis.com/auth/webmasters.readonly']
DISCOVERY_URI = ('https://www.googleapis.com/discovery/v1/apis/customsearch/v1/rest')

CLIENT_SECRETS_PATH = r'C:\Users\YOUR-PATH\client_secrets.json' # Path to client_secrets.json file.

parser = argparse.ArgumentParser(
    formatter_class=argparse.RawDescriptionHelpFormatter,
    parents=[tools.argparser])
flags = parser.parse_args([])


flow = client.flow_from_clientsecrets(
    CLIENT_SECRETS_PATH, scope=SCOPES,
    message=tools.message_if_missing(CLIENT_SECRETS_PATH))

storage = file.Storage('searchconsole.dat')
credentials = storage.get()

if credentials is None or credentials.invalid:
  credentials = tools.run_flow(flow, storage, flags)
http = credentials.authorize(http=httplib2.Http())

webmasters_service = build('webmasters', 'v3', http=http)

"""
Step 4: Query All Your Search Traffic
"""
#Set Date
end_date = datetime.date.today()-relativedelta.relativedelta(days=3)
start_date = end_date 

#Execute your API Request
def execute_request(service, property_uri, request):
    return service.searchanalytics().query(siteUrl=property_uri, body=request).execute()

#Get more than 25000 rows
maxRows = 25000; 
i = 0;
numRows = maxRows;
scDict = defaultdict(list);

while (numRows == 25000 and i < 40) : # Limit to 1M rows
    request = {
        'startDate': datetime.datetime.strftime(start_date,"%Y-%m-%d"),
        'endDate': datetime.datetime.strftime(end_date,'%Y-%m-%d'),
        'dimensions': ['date','page','query'],
        'rowLimit': maxRows, 
        'startRow': i*maxRows
    }
    response = execute_request(webmasters_service, site, request)
#Process the response
    for row in response['rows']:
        scDict['date'].append(row['keys'][0] or 0)    
        scDict['page'].append(row['keys'][1] or 0)
        scDict['query'].append(row['keys'][2] or 0)
        scDict['clicks'].append(row['clicks'] or 0)
        scDict['ctr'].append(row['ctr'] or 0)
        scDict['impressions'].append(row['impressions'] or 0)
        scDict['position'].append(row['position'] or 0)

  #Add response to dataframe 
    df = pd.DataFrame(data = scDict)
    df['clicks'] = df['clicks'].astype('int')
    df['ctr'] = df['ctr']*100
    df['impressions'] = df['impressions'].astype('int')
    df['position'] = df['position'].round(2)
    df.sort_values('clicks',inplace=True,ascending=False)
    numRows=len(response['rows'])
    i=i+1

Save Your API Call to the MySQL Database

Now that we have made the API call, we are going to add the result to the mySQL database.

We will need thesqlalchemy package.

import sqlalchemy

#Create Engine
engine = sqlalchemy.create_engine('mysql+pymysql://root:@localhost/gsc_db')

#Append the table with the new data
df.to_sql(
        name = 'backup_gsc',
        con = engine,
        index = False,
        if_exists = 'append')

You can now see in PHPMyAdmin that your data was properly saved into your MySQL database.

Run the Task Every Day With Windows Task Scheduler

Now we want to automate the process by making the API call every day.

To do this, we need to call ask Windows task Scheduler to run the Python code.

Right now, you should have a python file that looks like the code below. I will call it gscbackups.py.

You can download the code on Github.

We will now run the code every day with Task scheduler.

If you don’t know how to do this, look at this guide: how to automate your SEO tasks with Task Scheduler.

Make sure that you autostart Xampp Apache and MySQL modules. You can look at this solution.

Automatically Start Xampp Modules

Then, launch the script every day.

L’attribut alt de cette image est vide, son nom de fichier est create-action-in-task-manager.jpg.

Full Code

import pandas as pd
import datetime
import httplib2
from apiclient.discovery import build
from collections import defaultdict
from dateutil import relativedelta
import argparse
from oauth2client import client
from oauth2client import file
from oauth2client import tools
import pymysql.cursors
import sqlalchemy

"""
Step 1: Create your db (the first time only)
connection = pymysql.connect(host='localhost',
                             user='root',
                             port='',
                             password='')

# Simulate the CREATE DATABASE function of mySQL
try:
    with connection.cursor() as cursor:
        cursor.execute('CREATE DATABASE gsc_db')

finally:
    connection.close()



Step 2: Create your table and/or connect to it
"""
connection = pymysql.connect(host='localhost',
                             user='root',
                             port='',
                             password='',
                             db='gsc_db',
                             cursorclass=pymysql.cursors.DictCursor)

try:
    with connection.cursor() as cursor:
        sqlQuery = '''CREATE TABLE IF NOT EXISTS backup_gsc(Date DATE, 
                                                            Page LONGTEXT, 
                                                            Query LONGTEXT, 
                                                            Clicks INT, 
                                                            Impressions INT, 
                                                            Ctr DECIMAL(10,2), 
                                                            Position DECIMAL(3,2))'''
        cursor.execute(sqlQuery)
finally:
    connection.close()


"""
Step 3: Connect to the API
"""
site = 'https://www.yoursite.com'

SCOPES = ['https://www.googleapis.com/auth/webmasters.readonly']
DISCOVERY_URI = ('https://www.googleapis.com/discovery/v1/apis/customsearch/v1/rest')

CLIENT_SECRETS_PATH = r'C:\Users\YOUR-PATH\client_secrets.json' # Path to client_secrets.json file.

parser = argparse.ArgumentParser(
    formatter_class=argparse.RawDescriptionHelpFormatter,
    parents=[tools.argparser])
flags = parser.parse_args([])


flow = client.flow_from_clientsecrets(
    CLIENT_SECRETS_PATH, scope=SCOPES,
    message=tools.message_if_missing(CLIENT_SECRETS_PATH))

storage = file.Storage('searchconsole.dat')
credentials = storage.get()

if credentials is None or credentials.invalid:
  credentials = tools.run_flow(flow, storage, flags)
http = credentials.authorize(http=httplib2.Http())

webmasters_service = build('webmasters', 'v3', http=http)

"""
Step 4: Query All Your Search Traffic
"""
#Set Date
end_date = datetime.date.today()-relativedelta.relativedelta(days=3)
start_date = end_date 

#Execute your API Request
def execute_request(service, property_uri, request):
    return service.searchanalytics().query(siteUrl=property_uri, body=request).execute()

#Get more than 25000 rows
maxRows = 25000; 
i = 0;
numRows = maxRows;
scDict = defaultdict(list);

while (numRows == 25000 and i < 40) : # Limit to 1M rows
    request = {
        'startDate': datetime.datetime.strftime(start_date,"%Y-%m-%d"),
        'endDate': datetime.datetime.strftime(end_date,'%Y-%m-%d'),
        'dimensions': ['date','page','query'],
        'rowLimit': maxRows, 
        'startRow': i*maxRows
    }
    response = execute_request(webmasters_service, site, request)
#Process the response
    for row in response['rows']:
        scDict['date'].append(row['keys'][0] or 0)    
        scDict['page'].append(row['keys'][1] or 0)
        scDict['query'].append(row['keys'][2] or 0)
        scDict['clicks'].append(row['clicks'] or 0)
        scDict['ctr'].append(row['ctr'] or 0)
        scDict['impressions'].append(row['impressions'] or 0)
        scDict['position'].append(row['position'] or 0)

  #Add response to dataframe 
    df = pd.DataFrame(data = scDict)
    df['clicks'] = df['clicks'].astype('int')
    df['ctr'] = df['ctr']*100
    df['impressions'] = df['impressions'].astype('int')
    df['position'] = df['position'].round(2)
    df.sort_values('clicks',inplace=True,ascending=False)
    numRows=len(response['rows'])
    i=i+1


"""
Step 5: Save your data to the MySQL database
"""

engine = sqlalchemy.create_engine('mysql+pymysql://root:@localhost/gsc_db')
df.columns
df.to_sql(
        name = 'backup_gsc',
        con = engine,
        index = False,
        if_exists = 'append')

Automate Your Python Script

To automate the Google Search Console API queries, schedule your python script on Windows task scheduler, or automate python script using CRON on Mac.

This is it, you now know how to backup your search console data automatically in MySQL.