This guide will show you the easiest way to automatically backup your Google Search Console Data in MySQL using Python.
Why Use a MySQL Database for SEO?
As SEOs, a lot of time, if you work on larger websites, you will run into a storage problem where you can’t load a CSV file or an excel file because you have too much data.
A SQL database gives you a place to store your data. With MySQL, you can have a place to store your Google Search Console backups.
Also, Google Data Studio has a connector for MySQL (limited to 100k rows, unfortunately). This way you can start automating your SEO reports.
Get Started
Before you can save your Google Search Console data to a MySQL database you need a few things.
- Install Python with Anaconda
- Install MySQL using XAMPP
- Create Your MySQL Database
- Get Google Search Console API Keys
- Query your search analytics Data with Google Search Console API
Create Your MySQL Database
If you have not created your database already, you need to create one to mirror the Google Search Console Data.
import pymysql.cursors
# Create a mySQL Database
# Establish connection
connection = pymysql.connect(host='localhost',
user='root',
port='',
password='')
# Simulate the CREATE DATABASE function of mySQL
try:
with connection.cursor() as cursor:
cursor.execute('CREATE DATABASE gsc_db')
finally:
connection.close()
# Create a table https://www.youtube.com/watch?v=YWVx4t8FAwU
connection = pymysql.connect(host='localhost',
user='root',
port='',
password='',
db='gsc_db',
cursorclass=pymysql.cursors.DictCursor)
try:
with connection.cursor() as cursor:
sqlQuery = '''CREATE TABLE IF NOT EXISTS backup_gsc(Date DATE,
Page LONGTEXT,
Query LONGTEXT,
Clicks INT,
Impressions INT,
Ctr DECIMAL(10,2),
Position DECIMAL(5,2))'''
cursor.execute(sqlQuery)
finally:
connection.close()
Create Your Google Search Console API Call Script
If you don’t know how to do this, look at this guide: how to use Google Search Console API with Python. The data will be stored in a Pandas DataFrame.
"""
Step 1: Get your JSON API Key
"""
#https://www.jcchouinard.com/how-to-get-google-search-console-api-keys/
# Make sur the you name you JSON api key client_secrets.json
"""
Step 2: Import Libraries
"""
import pandas as pd
import datetime
import httplib2
from apiclient.discovery import build
from collections import defaultdict
from dateutil import relativedelta
import argparse
from oauth2client import client
from oauth2client import file
from oauth2client import tools
"""
Step 3: Connect to the API
"""
site = 'https://www.yoursite.com'
SCOPES = ['https://www.googleapis.com/auth/webmasters.readonly']
DISCOVERY_URI = ('https://www.googleapis.com/discovery/v1/apis/customsearch/v1/rest')
CLIENT_SECRETS_PATH = r'C:\Users\YOUR-PATH\client_secrets.json' # Path to client_secrets.json file.
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
parents=[tools.argparser])
flags = parser.parse_args([])
flow = client.flow_from_clientsecrets(
CLIENT_SECRETS_PATH, scope=SCOPES,
message=tools.message_if_missing(CLIENT_SECRETS_PATH))
storage = file.Storage('searchconsole.dat')
credentials = storage.get()
if credentials is None or credentials.invalid:
credentials = tools.run_flow(flow, storage, flags)
http = credentials.authorize(http=httplib2.Http())
webmasters_service = build('webmasters', 'v3', http=http)
"""
Step 4: Query All Your Search Traffic
"""
#Set Date
end_date = datetime.date.today()-relativedelta.relativedelta(days=3)
start_date = end_date
#Execute your API Request
def execute_request(service, property_uri, request):
return service.searchanalytics().query(siteUrl=property_uri, body=request).execute()
#Get more than 25000 rows
maxRows = 25000
i = 0
numRows = maxRows
scDict = defaultdict(list)
while (numRows == 25000 and i < 40) : # Limit to 1M rows
request = {
'startDate': datetime.datetime.strftime(start_date,"%Y-%m-%d"),
'endDate': datetime.datetime.strftime(end_date,'%Y-%m-%d'),
'dimensions': ['date','page','query'],
'rowLimit': maxRows,
'startRow': i*maxRows
}
response = execute_request(webmasters_service, site, request)
#Process the response
for row in response['rows']:
scDict['date'].append(row['keys'][0] or 0)
scDict['page'].append(row['keys'][1] or 0)
scDict['query'].append(row['keys'][2] or 0)
scDict['clicks'].append(row['clicks'] or 0)
scDict['ctr'].append(row['ctr'] or 0)
scDict['impressions'].append(row['impressions'] or 0)
scDict['position'].append(row['position'] or 0)
#Add response to dataframe
df = pd.DataFrame(data = scDict)
df['clicks'] = df['clicks'].astype('int')
df['ctr'] = df['ctr']*100
df['impressions'] = df['impressions'].astype('int')
df['position'] = df['position'].round(2)
df.sort_values('clicks',inplace=True,ascending=False)
numRows=len(response['rows'])
i=i+1
Save Your API Call to the MySQL Database
Now that we have made the API call, we are going to add the result to the mySQL database.
We will need thesqlalchemy
package.
import sqlalchemy
#Create Engine
engine = sqlalchemy.create_engine('mysql+pymysql://root:@localhost/gsc_db')
#Append the table with the new data
df.to_sql(
name = 'backup_gsc',
con = engine,
index = False,
if_exists = 'append')
You can now see in PHPMyAdmin that your data was properly saved into your MySQL database.
Run the Task Every Day With Windows Task Scheduler
Now we want to automate the process by making the API call every day.
To do this, we need to call ask Windows task Scheduler to run the Python code.
Right now, you should have a python file that looks like the code below. I will call it gscbackups.py
.
You can download the code on Github.
We will now run the code every day with Task scheduler.
If you don’t know how to do this, look at this guide: how to automate your SEO tasks with Task Scheduler.
Make sure that you autostart Xampp Apache and MySQL modules. You can look at this solution.
Then, launch the script every day.
Full Code
import pandas as pd
import datetime
import httplib2
from apiclient.discovery import build
from collections import defaultdict
from dateutil import relativedelta
import argparse
from oauth2client import client
from oauth2client import file
from oauth2client import tools
import pymysql.cursors
import sqlalchemy
"""
Step 1: Create your db (the first time only)
connection = pymysql.connect(host='localhost',
user='root',
port='',
password='')
# Simulate the CREATE DATABASE function of mySQL
try:
with connection.cursor() as cursor:
cursor.execute('CREATE DATABASE gsc_db')
finally:
connection.close()
Step 2: Create your table and/or connect to it
"""
connection = pymysql.connect(host='localhost',
user='root',
port='',
password='',
db='gsc_db',
cursorclass=pymysql.cursors.DictCursor)
try:
with connection.cursor() as cursor:
sqlQuery = '''CREATE TABLE IF NOT EXISTS backup_gsc(Date DATE,
Page LONGTEXT,
Query LONGTEXT,
Clicks INT,
Impressions INT,
Ctr DECIMAL(10,2),
Position DECIMAL(5,2))'''
cursor.execute(sqlQuery)
finally:
connection.close()
"""
Step 3: Connect to the API
"""
site = 'https://www.yoursite.com'
SCOPES = ['https://www.googleapis.com/auth/webmasters.readonly']
DISCOVERY_URI = ('https://www.googleapis.com/discovery/v1/apis/customsearch/v1/rest')
CLIENT_SECRETS_PATH = r'C:\Users\YOUR-PATH\client_secrets.json' # Path to client_secrets.json file.
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
parents=[tools.argparser])
flags = parser.parse_args([])
flow = client.flow_from_clientsecrets(
CLIENT_SECRETS_PATH, scope=SCOPES,
message=tools.message_if_missing(CLIENT_SECRETS_PATH))
storage = file.Storage('searchconsole.dat')
credentials = storage.get()
if credentials is None or credentials.invalid:
credentials = tools.run_flow(flow, storage, flags)
http = credentials.authorize(http=httplib2.Http())
webmasters_service = build('webmasters', 'v3', http=http)
"""
Step 4: Query All Your Search Traffic
"""
#Set Date
end_date = datetime.date.today()-relativedelta.relativedelta(days=3)
start_date = end_date
#Execute your API Request
def execute_request(service, property_uri, request):
return service.searchanalytics().query(siteUrl=property_uri, body=request).execute()
#Get more than 25000 rows
maxRows = 25000;
i = 0;
numRows = maxRows;
scDict = defaultdict(list);
while (numRows == 25000 and i < 40) : # Limit to 1M rows
request = {
'startDate': datetime.datetime.strftime(start_date,"%Y-%m-%d"),
'endDate': datetime.datetime.strftime(end_date,'%Y-%m-%d'),
'dimensions': ['date','page','query'],
'rowLimit': maxRows,
'startRow': i*maxRows
}
response = execute_request(webmasters_service, site, request)
#Process the response
for row in response['rows']:
scDict['date'].append(row['keys'][0] or 0)
scDict['page'].append(row['keys'][1] or 0)
scDict['query'].append(row['keys'][2] or 0)
scDict['clicks'].append(row['clicks'] or 0)
scDict['ctr'].append(row['ctr'] or 0)
scDict['impressions'].append(row['impressions'] or 0)
scDict['position'].append(row['position'] or 0)
#Add response to dataframe
df = pd.DataFrame(data = scDict)
df['clicks'] = df['clicks'].astype('int')
df['ctr'] = df['ctr']*100
df['impressions'] = df['impressions'].astype('int')
df['position'] = df['position'].round(2)
df.sort_values('clicks',inplace=True,ascending=False)
numRows=len(response['rows'])
i=i+1
"""
Step 5: Save your data to the MySQL database
"""
engine = sqlalchemy.create_engine('mysql+pymysql://root:@localhost/gsc_db')
df.columns
df.to_sql(
name = 'backup_gsc',
con = engine,
index = False,
if_exists = 'append')
Automate Your Python Script
To automate the Google Search Console API queries, schedule your python script on Windows task scheduler, or automate python script using CRON on Mac.
This is it, you now know how to backup your search console data automatically in MySQL.
SEO Strategist at Tripadvisor, ex- Seek (Melbourne, Australia). Specialized in technical SEO. Writer in Python, Information Retrieval, SEO and machine learning. Guest author at SearchEngineJournal, SearchEngineLand and OnCrawl.