This post is part of the Guide on Python for SEO and the guide on the Google Search Console API

This guide will show you the easiest way to automatically backup your Google Search Console Data in MySQL using Python.

Why Use a MySQL Database for SEO?

As SEOs, a lot of time, if you work on larger websites, you will run into a storage problem where you can’t load a CSV file or an excel file because you have too much data.

A SQL database gives you a place to store your data. With MySQL, you can have a place to store your Google Search Console backups.

Join the Newsletter

    Also, Google Data Studio has a connector for MySQL (limited to 100k rows, unfortunately). This way you can start automating your SEO reports.

    Get Started

    Before you can save your Google Search Console data to a MySQL database you need a few things.

    Create Your MySQL Database

    If you have not created your database already, you need to create one to mirror the Google Search Console Data.

    import pymysql.cursors
    
    # Create a mySQL Database
    # Establish connection
    connection = pymysql.connect(host='localhost',
                                 user='root',
                                 port='',
                                 password='')
    
    # Simulate the CREATE DATABASE function of mySQL
    try:
        with connection.cursor() as cursor:
            cursor.execute('CREATE DATABASE gsc_db')
    
    finally:
        connection.close()
        
        
    # Create a table https://www.youtube.com/watch?v=YWVx4t8FAwU
    connection = pymysql.connect(host='localhost',
                                 user='root',
                                 port='',
                                 password='',
                                 db='gsc_db',
                                 cursorclass=pymysql.cursors.DictCursor)
    
    try:
        with connection.cursor() as cursor:
            sqlQuery = '''CREATE TABLE IF NOT EXISTS backup_gsc(Date DATE, 
                                                                Page LONGTEXT, 
                                                                Query LONGTEXT, 
                                                                Clicks INT, 
                                                                Impressions INT, 
                                                                Ctr DECIMAL(10,2), 
                                                                Position DECIMAL(5,2))''' 
            cursor.execute(sqlQuery)
    finally:
        connection.close()
    

    Create Your Google Search Console API Call Script

    If you don’t know how to do this, look at this guide: how to use Google Search Console API with Python. The data will be stored in a Pandas DataFrame.

    """
    Step 1: Get your JSON API Key
    """
    #https://www.jcchouinard.com/how-to-get-google-search-console-api-keys/ 
    # Make sur the you name you JSON api key client_secrets.json 
    
    """
    Step 2: Import Libraries
    """
    
    import pandas as pd
    import datetime
    import httplib2
    from apiclient.discovery import build
    from collections import defaultdict
    from dateutil import relativedelta
    import argparse
    from oauth2client import client
    from oauth2client import file
    from oauth2client import tools
    
    """
    Step 3: Connect to the API
    """
    site = 'https://www.yoursite.com'
    
    SCOPES = ['https://www.googleapis.com/auth/webmasters.readonly']
    DISCOVERY_URI = ('https://www.googleapis.com/discovery/v1/apis/customsearch/v1/rest')
    
    CLIENT_SECRETS_PATH = r'C:\Users\YOUR-PATH\client_secrets.json' # Path to client_secrets.json file.
    
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        parents=[tools.argparser])
    flags = parser.parse_args([])
    
    
    flow = client.flow_from_clientsecrets(
        CLIENT_SECRETS_PATH, scope=SCOPES,
        message=tools.message_if_missing(CLIENT_SECRETS_PATH))
    
    storage = file.Storage('searchconsole.dat')
    credentials = storage.get()
    
    if credentials is None or credentials.invalid:
      credentials = tools.run_flow(flow, storage, flags)
    http = credentials.authorize(http=httplib2.Http())
    
    webmasters_service = build('webmasters', 'v3', http=http)
    
    """
    Step 4: Query All Your Search Traffic
    """
    #Set Date
    end_date = datetime.date.today()-relativedelta.relativedelta(days=3)
    start_date = end_date 
    
    #Execute your API Request
    def execute_request(service, property_uri, request):
        return service.searchanalytics().query(siteUrl=property_uri, body=request).execute()
    
    #Get more than 25000 rows
    maxRows = 25000
    i = 0
    numRows = maxRows
    scDict = defaultdict(list)
    
    while (numRows == 25000 and i < 40) : # Limit to 1M rows
        request = {
            'startDate': datetime.datetime.strftime(start_date,"%Y-%m-%d"),
            'endDate': datetime.datetime.strftime(end_date,'%Y-%m-%d'),
            'dimensions': ['date','page','query'],
            'rowLimit': maxRows, 
            'startRow': i*maxRows
        }
        response = execute_request(webmasters_service, site, request)
    #Process the response
        for row in response['rows']:
            scDict['date'].append(row['keys'][0] or 0)    
            scDict['page'].append(row['keys'][1] or 0)
            scDict['query'].append(row['keys'][2] or 0)
            scDict['clicks'].append(row['clicks'] or 0)
            scDict['ctr'].append(row['ctr'] or 0)
            scDict['impressions'].append(row['impressions'] or 0)
            scDict['position'].append(row['position'] or 0)
    
      #Add response to dataframe 
        df = pd.DataFrame(data = scDict)
        df['clicks'] = df['clicks'].astype('int')
        df['ctr'] = df['ctr']*100
        df['impressions'] = df['impressions'].astype('int')
        df['position'] = df['position'].round(2)
        df.sort_values('clicks',inplace=True,ascending=False)
        numRows=len(response['rows'])
        i=i+1
    

    Save Your API Call to the MySQL Database

    Now that we have made the API call, we are going to add the result to the mySQL database.

    We will need thesqlalchemy package.

    import sqlalchemy
    
    #Create Engine
    engine = sqlalchemy.create_engine('mysql+pymysql://root:@localhost/gsc_db')
    
    #Append the table with the new data
    df.to_sql(
            name = 'backup_gsc',
            con = engine,
            index = False,
            if_exists = 'append')
    

    You can now see in PHPMyAdmin that your data was properly saved into your MySQL database.

    Run the Task Every Day With Windows Task Scheduler

    Now we want to automate the process by making the API call every day.

    To do this, we need to call ask Windows task Scheduler to run the Python code.

    Right now, you should have a python file that looks like the code below. I will call it gscbackups.py.

    You can download the code on Github.

    We will now run the code every day with Task scheduler.

    If you don’t know how to do this, look at this guide: how to automate your SEO tasks with Task Scheduler.

    Make sure that you autostart Xampp Apache and MySQL modules. You can look at this solution.

    Automatically Start Xampp Modules

    Then, launch the script every day.

    L’attribut alt de cette image est vide, son nom de fichier est create-action-in-task-manager.jpg.

    Full Code

    import pandas as pd
    import datetime
    import httplib2
    from apiclient.discovery import build
    from collections import defaultdict
    from dateutil import relativedelta
    import argparse
    from oauth2client import client
    from oauth2client import file
    from oauth2client import tools
    import pymysql.cursors
    import sqlalchemy
    
    """
    Step 1: Create your db (the first time only)
    connection = pymysql.connect(host='localhost',
                                 user='root',
                                 port='',
                                 password='')
    
    # Simulate the CREATE DATABASE function of mySQL
    try:
        with connection.cursor() as cursor:
            cursor.execute('CREATE DATABASE gsc_db')
    
    finally:
        connection.close()
    
    
    
    Step 2: Create your table and/or connect to it
    """
    connection = pymysql.connect(host='localhost',
                                 user='root',
                                 port='',
                                 password='',
                                 db='gsc_db',
                                 cursorclass=pymysql.cursors.DictCursor)
    
    try:
        with connection.cursor() as cursor:
            sqlQuery = '''CREATE TABLE IF NOT EXISTS backup_gsc(Date DATE, 
                                                                Page LONGTEXT, 
                                                                Query LONGTEXT, 
                                                                Clicks INT, 
                                                                Impressions INT, 
                                                                Ctr DECIMAL(10,2), 
                                                                Position DECIMAL(5,2))'''
            cursor.execute(sqlQuery)
    finally:
        connection.close()
    
    
    """
    Step 3: Connect to the API
    """
    site = 'https://www.yoursite.com'
    
    SCOPES = ['https://www.googleapis.com/auth/webmasters.readonly']
    DISCOVERY_URI = ('https://www.googleapis.com/discovery/v1/apis/customsearch/v1/rest')
    
    CLIENT_SECRETS_PATH = r'C:\Users\YOUR-PATH\client_secrets.json' # Path to client_secrets.json file.
    
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        parents=[tools.argparser])
    flags = parser.parse_args([])
    
    
    flow = client.flow_from_clientsecrets(
        CLIENT_SECRETS_PATH, scope=SCOPES,
        message=tools.message_if_missing(CLIENT_SECRETS_PATH))
    
    storage = file.Storage('searchconsole.dat')
    credentials = storage.get()
    
    if credentials is None or credentials.invalid:
      credentials = tools.run_flow(flow, storage, flags)
    http = credentials.authorize(http=httplib2.Http())
    
    webmasters_service = build('webmasters', 'v3', http=http)
    
    """
    Step 4: Query All Your Search Traffic
    """
    #Set Date
    end_date = datetime.date.today()-relativedelta.relativedelta(days=3)
    start_date = end_date 
    
    #Execute your API Request
    def execute_request(service, property_uri, request):
        return service.searchanalytics().query(siteUrl=property_uri, body=request).execute()
    
    #Get more than 25000 rows
    maxRows = 25000; 
    i = 0;
    numRows = maxRows;
    scDict = defaultdict(list);
    
    while (numRows == 25000 and i < 40) : # Limit to 1M rows
        request = {
            'startDate': datetime.datetime.strftime(start_date,"%Y-%m-%d"),
            'endDate': datetime.datetime.strftime(end_date,'%Y-%m-%d'),
            'dimensions': ['date','page','query'],
            'rowLimit': maxRows, 
            'startRow': i*maxRows
        }
        response = execute_request(webmasters_service, site, request)
    #Process the response
        for row in response['rows']:
            scDict['date'].append(row['keys'][0] or 0)    
            scDict['page'].append(row['keys'][1] or 0)
            scDict['query'].append(row['keys'][2] or 0)
            scDict['clicks'].append(row['clicks'] or 0)
            scDict['ctr'].append(row['ctr'] or 0)
            scDict['impressions'].append(row['impressions'] or 0)
            scDict['position'].append(row['position'] or 0)
    
      #Add response to dataframe 
        df = pd.DataFrame(data = scDict)
        df['clicks'] = df['clicks'].astype('int')
        df['ctr'] = df['ctr']*100
        df['impressions'] = df['impressions'].astype('int')
        df['position'] = df['position'].round(2)
        df.sort_values('clicks',inplace=True,ascending=False)
        numRows=len(response['rows'])
        i=i+1
    
    
    """
    Step 5: Save your data to the MySQL database
    """
    
    engine = sqlalchemy.create_engine('mysql+pymysql://root:@localhost/gsc_db')
    df.columns
    df.to_sql(
            name = 'backup_gsc',
            con = engine,
            index = False,
            if_exists = 'append')
    

    Automate Your Python Script

    To automate the Google Search Console API queries, schedule your python script on Windows task scheduler, or automate python script using CRON on Mac.

    This is it, you now know how to backup your search console data automatically in MySQL.

    4.9/5 - (8 votes)