From a1377d7fd77c137f030557a4c1726c83ff10b2fd Mon Sep 17 00:00:00 2001 From: Bryant Howell Date: Wed, 4 Dec 2019 09:03:41 -0600 Subject: [PATCH] Big changes to structure of tableau_documents, trying to make the file classes make a lot more sense --- tableau_documents/tableau_datasource.py | 12 +- tableau_documents/tableau_file.py | 362 ++++++++++++++++++++++++ tableau_documents/tableau_workbook.py | 25 +- 3 files changed, 392 insertions(+), 7 deletions(-) diff --git a/tableau_documents/tableau_datasource.py b/tableau_documents/tableau_datasource.py index 7acf8ca..757d067 100644 --- a/tableau_documents/tableau_datasource.py +++ b/tableau_documents/tableau_datasource.py @@ -830,8 +830,10 @@ def add_table_column(self, table_alias: str, table_field_name: str, tableau_fiel self.column_mapping[tableau_field_alias] = "[{}].[{}]".format(table_alias, table_field_name) - def add_column_alias(self, tableau_field_alias, caption=None, dimension_or_measure=None, - discrete_or_continuous=None, datatype=None, calculation=None): + def add_column_alias(self, tableau_field_alias: str, caption: Optional[str] = None, + dimension_or_measure: Optional[str] = None, + discrete_or_continuous: Optional[str] = None, datatype: Optional[str] = None, + calculation: Optional[str] = None): if dimension_or_measure.lower() in ['dimension', 'measure']: role = dimension_or_measure.lower() else: @@ -917,7 +919,7 @@ def add_relative_date_extract_filter(self, column_name, period_type, number_of_p to_date) self.extract_filters.append(ds_filter) - def create_continuous_filter(self, column_name, min_value=None, max_value=None, date=False): + def create_continuous_filter(self, column_name: str, min_value=None, max_value=None, date=False) -> Dict: # Dates need to be wrapped in # # if date is True: if min_value is not None: @@ -943,7 +945,7 @@ def create_continuous_filter(self, column_name, min_value=None, max_value=None, } return ds_filter - def create_relative_date_filter(self, column_name, period_type, number_of_periods, + def create_relative_date_filter(self, column_name: str, period_type, number_of_periods, previous_next_current='previous', to_date=False): if period_type.lower() not in ['quarter', 'year', 'month', 'day', 'hour', 'minute']: raise InvalidOptionException('period_type must be one of : quarter, year, month, day, hour, minute') @@ -1058,7 +1060,7 @@ def generate_filters(self, filter_list: List[Dict]) -> List[ET.Element]: return_array.append(f) return return_array - def generate_datasource_filters_section(self): + def generate_datasource_filters_section(self) -> List[ET.Element]: filters = self.generate_filters(self.datasource_filters) filters_array = [] for f in filters: diff --git a/tableau_documents/tableau_file.py b/tableau_documents/tableau_file.py index 48b62cf..9b4b2c8 100644 --- a/tableau_documents/tableau_file.py +++ b/tableau_documents/tableau_file.py @@ -6,6 +6,7 @@ import codecs from typing import Union, Any, Optional, List, Dict, Tuple import xml.etree.ElementTree as ET +import io from tableau_tools.logging_methods import LoggingMethods from tableau_tools.logger import Logger @@ -254,3 +255,364 @@ def save_new_file(self, new_filename_no_extension: str, data_file_replacement_ma self.tableau_document.save_file(save_filename) return save_filename + + +# Hyper files are not considered in this situation as they are binary and generated a different way + +# This is a helper class with factory and static methods +class TableauFileManager(LoggingMethods): + def __init__(self, logger_obj: Optional[Logger] = None): + + @staticmethod + def open(self, filename: str, logger_obj: Optional[Logger] = None): + self.log('Opening {}'.format(filename)) + # Packaged (X) files must come first because they are supersets + if filename.lower().find('.tdsx') != -1: + self._original_file_type = 'tdsx' + return TDSX(filename=filename, logger_obj=logger_obj) + elif filename.lower().find('.twbx') != -1: + self._original_file_type = 'twbx' + #self._final_file_type = 'twbx' + return TWBX(filename=filename, logger_obj=logger_obj) + elif filename.lower().find('.tflx') != -1: + self._original_file_type = 'tflx' + return TFLX(filename=filename, logger_obj=logger_obj) + elif filename.lower().find('.twb') != -1: + self._original_file_type = 'twb' + return TWB(filename=filename, logger_obj=logger_obj) + elif filename.lower().find('.tds') != -1: + self._original_file_type = 'tds' + return TDS(filename=filename, logger_obj=logger_obj) + elif filename.lower().find('tfl') != -1: + self._original_file_type = 'tfl' + return TFL(filename=filename, logger_obj=logger_obj) + else: + raise InvalidOptionException('Must open a Tableau file with ending of tds, tdsx, twb, twbx, tfl, tflx') + + + # Appropriate extension added if needed + def save_new_file(self, new_filename_no_extension: str, data_file_replacement_map: Optional[Dict], + new_data_files_map: Optional[Dict]) -> str: + self.start_log_block() + new_filename = new_filename_no_extension.split('.')[0] # simple algorithm to kill extension + if new_filename is None: + new_filename = new_filename_no_extension + self.log('Saving to a file with new filename {}'.format(new_filename)) + # Change filetype if there are new extracts to add + for ds in self.datasources: + if ds.tde_filename is not None or new_data_files_map is not None: + if self.file_type == 'twb': + self._final_file_type = 'twbx' + self.packaged_filename = "{}.twb".format(new_filename) + self.log('Final filetype will be TWBX') + break + if self.file_type == 'tds' or new_data_files_map is not None: + self._final_file_type = 'tdsx' + self.packaged_filename = "{}.tds".format(new_filename) + self.log('Final filetype will be TDSX') + break + + if self._final_file_type in ['twbx', 'tdsx']: + initial_save_filename = "{}.{}".format(new_filename, self._final_file_type) + # Make sure you don't overwrite the existing original file + files = list(filter(os.path.isfile, os.listdir(os.curdir))) # files only + save_filename = initial_save_filename + file_versions = 1 + while save_filename in files: + name_parts = initial_save_filename.split(".") + save_filename = "{} ({}).{}".format(name_parts[0],file_versions, name_parts[1]) + file_versions += 1 + new_zf = zipfile.ZipFile(save_filename, 'w', zipfile.ZIP_DEFLATED) + # Save the object down + self.log('Creating temporary XML file {}'.format(self.packaged_filename)) + # Have to extract the original TWB to temporary file + self.log('Creating from original file {}'.format(self.orig_filename)) + if self._original_file_type == 'twbx': + file_obj = open(self.orig_filename, 'rb') + o_zf = zipfile.ZipFile(file_obj) + o_zf.extract(self.tableau_document.twb_filename) + shutil.copy(self.tableau_document.twb_filename, 'temp.twb') + os.remove(self.tableau_document.twb_filename) + self.tableau_document.twb_filename = 'temp.twb' + file_obj.close() + + # Call to the tableau_document object to write the Tableau XML + self.tableau_document.save_file(self.packaged_filename) + new_zf.write(self.packaged_filename) + self.log('Removing file {}'.format(self.packaged_filename)) + os.remove(self.packaged_filename) + + if self._original_file_type == 'twbx': + os.remove('temp.twb') + self.log('Removed file temp.twb'.format(self.packaged_filename)) + + temp_directories_to_remove = {} + + if len(self.other_files) > 0: + file_obj = open(self.orig_filename, 'rb') + o_zf = zipfile.ZipFile(file_obj) + + # Find datasources with new extracts, and skip their files + extracts_to_skip = [] + for ds in self.tableau_document.datasources: + if ds.existing_tde_filename is not None and ds.tde_filename is not None: + extracts_to_skip.append(ds.existing_tde_filename) + + for filename in self.other_files: + self.log('Looking into additional files: {}'.format(filename)) + + # Skip extracts listed for replacement + if filename in extracts_to_skip: + self.log('File {} is from an extract that has been replaced, skipping'.format(filename)) + continue + + # If file is listed in the data_file_replacement_map, write data from the mapped in file + if data_file_replacement_map and filename in data_file_replacement_map: + new_zf.write(data_file_replacement_map[filename], "/" + filename) + # Delete from the data_file_replacement_map to reduce down to end + del data_file_replacement_map[filename] + else: + o_zf.extract(filename) + new_zf.write(filename) + os.remove(filename) + self.log('Removed file {}'.format(filename)) + lowest_level = filename.split('/') + temp_directories_to_remove[lowest_level[0]] = True + file_obj.close() + + # Loop through remaining files in data_file_replacement_map to just add + for filename in new_data_files_map: + new_zf.write(new_data_files_map[filename], "/" + filename) + + # If new extract, write that file + for ds in self.tableau_document.datasources: + if ds.tde_filename is not None: + new_zf.write(ds.tde_filename, '/Data/Datasources/{}'.format(ds.tde_filename)) + os.remove(ds.tde_filename) + self.log('Removed file {}'.format(ds.tde_filename)) + + # Cleanup all the temporary directories + for directory in temp_directories_to_remove: + self.log('Removing directory {}'.format(directory)) + try: + shutil.rmtree(directory) + except OSError as e: + # Just means that directory didn't exist for some reason, probably a swap occurred + pass + new_zf.close() + + return save_filename + else: + initial_save_filename = "{}.{}".format(new_filename_no_extension, self.file_type) + # Make sure you don't overwrite the existing original file + files = list(filter(os.path.isfile, os.listdir(os.curdir))) # files only + save_filename = initial_save_filename + file_versions = 1 + while save_filename in files: + name_parts = initial_save_filename.split(".") + save_filename = "{} ({}).{}".format(name_parts[0],file_versions, name_parts[1]) + file_versions += 1 + + self.tableau_document.save_file(save_filename) + return save_filename + + + # For saving a TWB or TDS (or other) from a document object. Actually should be + @staticmethod + def create_new_tds(tableau_datasource: TableauDatasource): + pass + + @staticmethod + def create_new_tdsx(tableau_datasource: TableauDatasource): + pass + + @staticmethod + def create_new_twb(tableau_workbook: TableauWorkbook): + pass + + @staticmethod + def create_new_twbx(tableau_workbook: TableauWorkbook): + pass + + +class TableauXmlFile(LoggingMethods): + def __init__(self, filename: str, logger_obj: Optional[Logger] = None): + self.logger: Optional[Logger] = logger_obj + self._tableau_document = None + self.packaged_file: bool = False + self.file_type: str + + # Appropriate extension added if needed + def save_new_file(self, new_filename_no_extension: str) -> str: + self.start_log_block() + new_filename = new_filename_no_extension.split('.')[0] # simple algorithm to kill extension + if new_filename is None: + new_filename = new_filename_no_extension + self.log('Saving to a file with new filename {}'.format(new_filename)) + + initial_save_filename = "{}.{}".format(new_filename_no_extension, self.file_type) + # Make sure you don't overwrite the existing original file + files = list(filter(os.path.isfile, os.listdir(os.curdir))) # files only + save_filename = initial_save_filename + file_versions = 1 + while save_filename in files: + name_parts = initial_save_filename.split(".") + save_filename = "{} ({}).{}".format(name_parts[0], file_versions, name_parts[1]) + file_versions += 1 + + self.tableau_document.save_file(save_filename) + return save_filename + + +class TWB(TableauXmlFile): + def __init__(self, filename: str, logger_obj: Optional[Logger] = None): + TableauXmlFile.__init__(self, filename=filename, logger_obj=logger_obj) + + self.file_type: str = '.twb' + try: + file_obj = open(filename, 'rb') + wb_fh = codecs.open(filename, 'r', encoding='utf-8') + ds_fh = codecs.open('temp_ds.txt', 'w', encoding='utf-8') + + # Stream through the file, only pulling the datasources section + ds_flag = None + # Skip all the metadata + metadata_flag = None + for line in wb_fh: + # Grab the datasources + + if line.find("\n") + if line.find("") != -1 and ds_flag is True: + ds_fh.close() + break + wb_fh.close() + + utf8_parser = ET.XMLParser(encoding='utf-8') + ds_xml = ET.parse('temp_ds.txt', parser=utf8_parser) + os.remove('temp_ds.txt') + + self._tableau_document = TableauWorkbook(filename, self.logger) + file_obj.close() + except IOError: + self.log("Cannot open file {}".format(filename)) + raise + +class TDS(TableauXmlFile): + def __init__(self, filename: str, logger_obj: Optional[Logger] = None): + TableauXmlFile.__init__(self, filename=filename, logger_obj=logger_obj) + + self.file_type: str = '.tds' + try: + # Here we throw out metadata-records even when opening a workbook from disk, they take up space + # and are recreate automatically. Very similar to what we do in initialization of TableauWorkbook + o_ds_fh = codecs.open(filename, 'r', encoding='utf-8') + WriterClass = codecs.getwriter('utf-8') + #ds_fh = codecs.open('temp_file.txt', 'w', encoding='utf-8') + ds_stream = io.StringIO() + ds_fh = WriterClass(ds_stream) + #self.temp_filename = 'temp_file.txt' + metadata_flag = None + for line in o_ds_fh: + # Grab the datasources + if line.find(" Union[TableauDatasource, TableauWorkbook]: + return self._tableau_document + + @property + def file_type(self) -> str: + return self._original_file_type + + @property + def datasources(self) -> List[TableauDatasource]: + if self._tableau_document.document_type == 'workbook': + return self._tableau_document.datasources + elif self._tableau_document.document_type == 'datasource': + return List[self._tableau_document, ] + else: + return [] + + + +class TDSX(TableauPackagedFile): + pass + + + +class TWBX(TableauPackagedFile): + pass + +class TFL(TableauXmlFile): + pass + +class TFLX(TableauPackagedFile): + pass \ No newline at end of file diff --git a/tableau_documents/tableau_workbook.py b/tableau_documents/tableau_workbook.py index 5e9c8d3..64afcfa 100644 --- a/tableau_documents/tableau_workbook.py +++ b/tableau_documents/tableau_workbook.py @@ -12,7 +12,8 @@ from .tableau_parameters import TableauParameters # from tableau_documents.tableau_document import TableauDocument - +# Historically, this was just a file wrapper. That functionality has moved to the TWB class +# This is now a stub for any eventual XML modification within the workbook class TableauWorkbook(LoggingMethods): def __init__(self, twb_filename: str, logger_obj: Optional[Logger] = None): #TableauDocument.__init__(self) @@ -76,6 +77,7 @@ def add_parameters_to_workbook(self) -> TableauParameters: self.parameters = TableauParameters(logger_obj=self.logger) return self.parameters + # Opens the original file, but substitutes in the new data sources def save_file(self, filename_no_extension: str, save_to_directory: Optional[str] = None): self.start_log_block() try: @@ -124,4 +126,23 @@ def save_file(self, filename_no_extension: str, save_to_directory: Optional[str] except IOError: self.log("Error: File '{} cannot be opened to write to".format(filename_no_extension)) self.end_log_block() - raise \ No newline at end of file + raise + + def get_datasource_xml_text(self) -> str: + self.start_log_block() + xml_text = "" + final_datasources = [] + if self.parameters is not None: + final_datasources.append(self.parameters) + final_datasources.extend(self.datasources) + else: + final_datasources = self.datasources + for ds in final_datasources: + ds_string = ds.get_datasource_xml() + if isinstance(ds_string, bytes): + final_string = ds_string.decode('utf-8') + else: + final_string = ds_string + xml_text += final_string + self.end_log_block() + return xml_text