I am a big fan of TableauScraper, which is super helpful for government agencies, namely the NYS Department of Health, I’m speaking to you, that wants to appear transparent but not really and make it difficult to download their data

I am a big fan of TableauScraper, which is super helpful for government agencies, namely the NYS Department of Health, I’m speaking to you, that wants to appear transparent but not really and make it difficult to download their data. But with TableauScraper it’s pretty easy to get their data. I use the TableauScraper prompt.py script to download the Tableau, prompt me for the table I want to select, then added a few line to dump the Panda into a CSV file. I won’t say I rewrote the book on this one, but it works for my purposes.

01</p>
02 The following script will get the session token, get the data,
03# prompt the user to select a worksheet, parse the data into a dataframe
04import json
05import pandas as pd
06import argparse
07from tableauscraper import TableauScraper as TS
08import os
09 
10parser = argparse.ArgumentParser()
11parser.add_argument(
12    "-get",
13    "--get",
14    choices=["dashboard", "dropdown", "select"],
15    help="type of action",
16    required=True,
17)
18parser.add_argument("-url", "--url", help="full tableau url", required=True)
19args = parser.parse_args()
20 
21ts = TS()
22ts.loads(args.url)
23 
24# checkout the json data
25# with open('data.json', 'w', encoding='utf-8') as f:
26#    json.dump(ts.data, f, ensure_ascii=False, indent=4)
27# with open('info.json', 'w', encoding='utf-8') as f:
28#    json.dump(ts.info, f, ensure_ascii=False, indent=4)
29 
30if args.get == "dashboard":
31    dashboard = ts.promptDashboard()
32elif args.get == "dropdown":
33    dashboard = ts.promptDropdown()
34elif args.get == "select":
35    dashboard = ts.promptSelect()
36 
37with pd.option_context(
38    "display.max_rows", None, "display.max_columns", 5, "display.width", 1000
39):
40    for idx, worksheet in enumerate(dashboard.worksheets):
41        if idx == 0:
42            print("|" + ("-" * (os.get_terminal_size().columns - 2)) + "|")
43        print("|" + worksheet.name.center(os.get_terminal_size().columns - 2) + "|")
44        print("|" + ("-" * (os.get_terminal_size().columns - 2)) + "|")
45        print(worksheet.data)
46        print("")
47        # selectable values
48        selectableColumns = worksheet.getSelectableColumns()
49        print(f"selectable columns for this worksheet : {len(selectableColumns)}")
50        for columnName in selectableColumns:
51            print("&bull; " + columnName)
52            # for value in worksheet.getValues(columnName):
53            #   print("t&bull;" + value)
54 
55        print("")
56        print("|" + ("-" * (os.get_terminal_size().columns - 2)) + "|")
57         
58        csvpath="/tmp/tableau.csv"
59        print("nWorksheet Data Exported to "+csvpath+"...n")
60         
61        worksheet.data.to_csv(csvpath)
62 
63    # dropdown list
64    dropdownInputs = dashboard.getDropdownInputs()
65    print(f"drop down lists for this dashboard : {len(dropdownInputs)}")
66    for inputName in dropdownInputs:
67        print("&bull; " + inputName)
68        for inputValue in dashboard.getDropdownValues(inputName):
69            print("t&bull; " + inputValue)

Leave a Reply

Your email address will not be published. Required fields are marked *