############################################################################### # # PYTHON SCRIPT TO RANDOMLY GENERATE DATA # STEPS : # 1 --> Generate a line of a table. # 2 --> Generate an entire table. # 3 --> Resolve the issues of dependencies and others. # 4 --> Convert the created tables to the PostgreSQL code. # Bonus --> Enjoy ! # ############################################################################### import os import random import csv ############################################################################### # # HERE IS ALL THE RAW DATA # # It is just csv files with for each a single column. # ############################################################################### pathIntegers : str = ".csv/row_numbers.csv" pathSentences : str = ".csv/sentences.csv" pathUrls : str = ".csv/urls.csv" pathWords : str = ".csv/words.csv" pathNames : str = ".csv/names.csv" pathNameModels : str = ".csv/name_models.csv" pathDates : str = ".csv/dates.csv" pathColors : str = ".csv/colors.csv" pathBrands : str = ".csv/brands.csv" ############################################################################### # # TO KNOW WHICH NUMBER IS USED FOR A PRIMARY KEY # ############################################################################### def initialize_dict_primary_key(pathFile : str) -> dict: """ :param pathFile: merely is `pathIntegers`. """ d : dict = {} with open(pathFile, 'r') as toto: csvList = list(csv.reader(toto)) for element in csvList[1:]: d[int(element[0])] = 0 return d ############################################################################### # id_truc = { X (int) : 1 or 0 } 1 for used, 0 for not. idMembreDict : dict = initialize_dict_primary_key(pathIntegers) idBoiteDict : dict = initialize_dict_primary_key(pathIntegers) idCouleurDict : dict = initialize_dict_primary_key(pathIntegers) idFilDict : dict = initialize_dict_primary_key(pathIntegers) idFormeDict : dict = initialize_dict_primary_key(pathIntegers) idIllustrDict : dict = initialize_dict_primary_key(pathIntegers) idMarqueDict : dict = initialize_dict_primary_key(pathIntegers) idMessageDict : dict = initialize_dict_primary_key(pathIntegers) idModeleDict : dict = initialize_dict_primary_key(pathIntegers) idMotifDict : dict = initialize_dict_primary_key(pathIntegers) idPieceDict : dict = initialize_dict_primary_key(pathIntegers) idPieceCompDict : dict = initialize_dict_primary_key(pathIntegers) idTagDict : dict = initialize_dict_primary_key(pathIntegers) ############################################################################### # # STEP 1 : GENERATE A LINE OF A TABLE # ############################################################################### def give_used_key(idDict : dict) -> str: """ :param idDict: idTructDict. :return: a key X that is used i.e idTrucDict[X] == 1 """ dictUsed : dict = { k: v for k, v in idDict.items() if v == 1 } keysUsed : list = list(dictUsed.keys()) length : int = len(keysUsed) # If the associated table has no instances yet. if length == 0: return '' randomKey : int = keysUsed[random.randrange(length)] return str(randomKey) ############################################################################### def give_not_used_key(idDict : dict) -> str: """ :param idDict: idTructDict. :return: a key X that is not used i.e idTrucDict[X] == 0 It updates the dict : 0 --> 1. """ dictNotUsed : dict = { k: v for k, v in idDict.items() if v == 0 } keysNotUsed : list = list(dictNotUsed.keys()) # If the associated table has no instances yet. if len(keysNotUsed) == 0: return '' randomKey : int = random.choice(keysNotUsed) idDict[randomKey] = 1 return str(randomKey) ############################################################################### # # USED FOR NOT PRIMARY KEY VARIABLES # ############################################################################### def random_element(pathFile : str) -> str: """ :param pathFile: the relative path of the csv file to read. :return: a random element from this file. """ with open(pathFile, 'r') as file: csvList : list = list(csv.reader(file)) random_index_line : int = random.randint(1, len(csvList) - 1) return "" if ( csvList[random_index_line] == [] ) \ else csvList[random_index_line][0] ############################################################################### def construct_line(*args : tuple) -> str: """ :param *args: a tuple of elements. example : [ "1", "'toto'", "'Lorem PIPsum'", "42" ]. :return: a line to give to the sql. example : "(1, 'toto', 'Lorem PIPsum', 42)". """ return "(" + ', '.join(list(args)) + ")" ############################################################################### def generate_line_acheter(idMembre : str, idPiece : str, quantite : str) -> str: return construct_line(idMembre, idPiece, quantite) def generate_line_avoir_motif(idPiece : str, idMotif : str) -> str: return construct_line(idPiece, idMotif) def generate_line_avoir_tag(idTag : str, idBoite : str) -> str: return construct_line(idTag, idBoite) def generate_line_boites(idMarque : str) -> str: idBoite : str = give_not_used_key(idBoiteDict) titre : str = "\'" + random_element(pathWords) + "\'" dateBoite : str = "\'" + random_element(pathDates) + "\'" return construct_line(idBoite, titre, dateBoite, idMarque) def generate_line_colorer(idPiece : str, idCouleur : str) -> str: return construct_line(idPiece, idCouleur) def generate_line_construire(idBoite : str, idModele : str) -> str: return construct_line(idBoite, idModele) def generate_line_contenir(idBoite : str, idPiece : str, quantite : str) -> str: return construct_line(idBoite, idPiece, quantite) def generate_line_couleurs() -> str: idCouleur : str = give_not_used_key(idCouleurDict) nomCouleur : str = "\'" + random_element(pathColors) + "\'" return construct_line(idCouleur, nomCouleur) def generate_line_enregistrer(idBoite : str, idMembre : str, quantite : str) -> str: return construct_line(idBoite, idMembre, quantite) def generate_line_etre(idPiece : str, idMarque : str) -> str: return construct_line(idPiece, idMarque) def generate_line_etre_complexe(idPiece : str, idPieceComp : str) -> str: return construct_line(idPiece, idPieceComp) def generate_line_etre_forme(idForme : str, idPiece : str) -> str: return construct_line(idForme, idPiece) def generate_line_fils(idModele : str) -> str: idFil : str = give_not_used_key(idFilDict) return construct_line(idFil, idModele) def generate_line_illustrations(idModele : str) -> str: idIllustr : str = give_not_used_key(idIllustrDict) urlIllustr : str = "\'" + random_element(pathUrls) + "\'" return construct_line(idIllustr, urlIllustr, idModele) def generate_line_marques() -> str: idMarque : str = give_not_used_key(idMarqueDict) nomMarque : str = "\'" + random_element(pathBrands) + "\'" return construct_line(idMarque, nomMarque) def generate_line_membres() -> str: idMembre : str = give_not_used_key(idMembreDict) nomMembre : str = "\'" + random_element(pathNames) + "\'" return construct_line(idMembre, nomMembre) def generate_line_messages(idMembre : str, idFil : str, idMessage2 : str) -> str: idMessage : str = give_not_used_key(idMessageDict) contenu : str = "\'" + random_element(pathSentences) + "\'" # When the first message is declared. if idMessage2 == '': idMessage2 = idMessage return construct_line(idMessage, contenu, idMembre, idFil, \ idMessage2) def generate_line_modeles(idMembre : str, idModeleEte : str) -> str: idModele : str = give_not_used_key(idModeleDict) nomModele : str = "\'" + random_element(pathNameModels) + "\'" urlNotice : str = "\'" + random_element(pathUrls) + "\'" # When the first modele is declared. if idModeleEte == '': idModeleEte = idModele return construct_line(idModele, nomModele, urlNotice, idMembre, \ idModeleEte) def generate_line_necessiter(idModele : str, idPiece : str, quantite : str) -> str: return construct_line(idModele, idPiece, quantite) def generate_line_noter(idModele : str, idMembre : str) -> str: note : str = "\'" + random_element(pathSentences) + "\'" return construct_line(idModele, idMembre, note) def generate_line_perdre(idMembre : str, idBoite : str, idPiece : str, quantite : str) -> str: return construct_line(idMembre, idBoite, idPiece, quantite) def generate_line_tags() -> str: idTag : str = give_not_used_key(idTagDict) nomTag : str = "\'" + random_element(pathWords) + "\'" return construct_line(idTag, nomTag) def generate_line_varier(idModele_1 : str, idModele_et : str) -> str: return construct_line(idModele_1, idModele_et) def generate_line_formes() -> str: idForme : str = give_not_used_key(idFormeDict) nomForme : str = "\'" + random_element(pathWords) + "\'" return construct_line(idForme, nomForme) def generate_line_motifs() -> str: idMotif : str = give_not_used_key(idMotifDict) nomMotif : str = "\'" + random_element(pathWords) + "\'" return construct_line(idMotif, nomMotif) def generate_line_pieces() -> str: idPiece : str = give_not_used_key(idPieceDict) return construct_line(idPiece) def generate_line_pieces_complexes() -> str: idPieceCo : str = give_not_used_key(idPieceCompDict) return construct_line(idPieceCo) ############################################################################### # # STEP 2 : GENERATE AN ENTIRE TABLE # # It is temporarily stored in a file with the extension .data # ############################################################################### def store_a_line_in_table(pathFile : str, funcGenerationLine) -> None: """ :param pathFile: the relative path where we want to store the table. :param funcGenerationLine: the function generate_line_* for the table. Write the line of the table in a .data file. """ with open(pathFile, 'a+') as file: # w+ : if does not exist. file.write(funcGenerationLine() + ",\n") ############################################################################### def store_table(nbElements : int, nameTable : str, funcGenerationLine) -> None: """ :param nbElements: the number of instances we want to get. :param nameTable: from `dictTables.keys()`. :param funcGenerationLine: the function generate_line_* for the table. """ for i in range(nbElements): store_a_line_in_table(give_path_file(nameTable), funcGenerationLine) ############################################################################### def generate_table(nbElements : int, nameTable : str, funcGen) -> None: for i in range(nbElements): store_table(1, nameTable, funcGen) ############################################################################### def generate_table_using_quantite(nbElements : int, nameTable : str, funcGen) -> None: total : int = nbElements for i in range(nbElements): quantite : int = random.randint(0, int(total / 10)) if i < nbElements \ else total total = total - quantite funcGen_2 = lambda : funcGen( str(quantite) ) store_table(1, nameTable, funcGen_2) ############################################################################### # # STEP 3 : RESOLVE THE ISSUES OF DEPENDENCIES AND OTHERS # # Just edits the .data files. # ############################################################################### def remove_duplicata(pathFile : str) -> None: """ :param pathFile: a .data file. """ lines_seen = set() # holds lines already seen. outfile = open("temp.data", "w") for line in open(pathFile, "r"): if line not in lines_seen: # not a duplicate. outfile.write(line) lines_seen.add(line) outfile.close() os.rename("temp.data", pathFile) ############################################################################### # # STEP 4 : CONVERSION TO PostgreSQL # ############################################################################### def convert_table_to_sql(pathFile : str, nameTable : str) -> None: """ :param pathFile: the relative path to the .data file which contains the table. :param nameTable: the name of the table. Write the code in append mode to the file called `insert.sql`. """ fileSql = open("src/insert.sql", 'a') # append mode. fileSql.write("INSERT INTO " + nameTable + " VALUES\n") if os.path.exists(pathFile): with open(pathFile, 'r') as fileData: fileSql.writelines(fileData.readlines()) fileSql.close() # Replace last ',' by ';'. with open("src/insert.sql", 'rb+') as fileSql: fileSql.seek(-2, 2) fileSql.truncate() with open("src/insert.sql", 'a') as fileSql: fileSql.write(";\n") fileSql.write("\n") ############################################################################### tablesDict : dict = { "membres" : generate_line_membres, "couleurs" : generate_line_couleurs, "marques" : generate_line_marques, "tags" : generate_line_tags, "formes" : generate_line_formes, "motifs" : generate_line_motifs, "pieces" : generate_line_pieces, "pieces_complexes" : generate_line_pieces_complexes, "modeles" : generate_line_modeles, "boites" : generate_line_boites, "fils" : generate_line_fils, "messages" : generate_line_messages, "illustrations" : generate_line_illustrations, "acheter" : generate_line_acheter, "avoir_motif" : generate_line_avoir_motif, "avoir_tag" : generate_line_avoir_tag, "colorer" : generate_line_colorer, "construire" : generate_line_construire, "contenir" : generate_line_contenir, "enregistrer" : generate_line_enregistrer, "etre" : generate_line_etre, "etre_complexe" : generate_line_etre_complexe, "etre_forme" : generate_line_etre_forme, "necessiter" : generate_line_necessiter, "noter" : generate_line_noter, "perdre" : generate_line_perdre, "varier" : generate_line_varier } ############################################################################### def give_path_file(nameTable : str) -> str: repertory : str = ".data/" return repertory + nameTable + ".data" ############################################################################### if __name__ == '__main__': ########################################################################### # # The tables that have no foreign keys. # ########################################################################### nbMembres : int = 101 store_table(nbMembres, "membres", tablesDict["membres"]) nbCouleurs : int = 21 store_table(nbCouleurs, "couleurs", tablesDict["couleurs"]) nbMarques : int = 10 store_table(nbMarques, "marques", tablesDict["marques"]) nbTags : int = 69 store_table(nbTags, "tags", tablesDict["tags"]) nbFormes : int = 69 store_table(nbFormes, "formes", tablesDict["formes"]) nbMotifs : int = 100 store_table(nbMotifs, "motifs", tablesDict["motifs"]) nbPieces : int = 750 store_table(nbPieces, "pieces", tablesDict["pieces"]) nbPiecesComp : int = 250 store_table(nbPiecesComp, "pieces_complexes", tablesDict["pieces_complexes"]) ########################################################################### # # The tables that are needed for the verb tables. # ########################################################################### nbModeles : int = 10 funcGen = lambda : tablesDict["modeles"]( give_used_key(idMembreDict), give_used_key(idModeleDict) ) generate_table(nbModeles, "modeles", funcGen) nbBoites : int = 20 funcGen = lambda : tablesDict["boites"]( give_used_key(idMarqueDict) ) generate_table(nbBoites, "boites", funcGen) nbFils : int = 100 funcGen = lambda : tablesDict["fils"]( give_used_key(idModeleDict) ) generate_table(nbFils, "fils", funcGen) nbMessages : int = 300 funcGen = lambda : tablesDict["messages"]( give_used_key(idMembreDict), give_used_key(idFilDict), give_used_key(idMessageDict) ) generate_table(nbMessages, "messages", funcGen) nbIllustrations : int = nbModeles funcGen = lambda : tablesDict["illustrations"]( give_used_key(idModeleDict) ) generate_table(nbIllustrations, "illustrations", funcGen) ########################################################################### # # The verb tables. # ########################################################################### nbAcheter : int = nbMembres if nbMembres < nbPieces else nbPieces funcGen = lambda x: tablesDict["acheter"]( give_used_key(idMembreDict), give_used_key(idPieceDict), x ) generate_table_using_quantite(nbAcheter, "acheter", funcGen) nbAvoirMotif : int = nbPieces if nbPieces < nbMotifs else nbMotifs funcGen = lambda : tablesDict["avoir_motif"]( give_used_key(idPieceDict), give_used_key(idMotifDict) ) generate_table(nbAvoirMotif, "avoir_motif", funcGen) nbAvoirTag : int = nbTags if nbTags < nbBoites else nbBoites funcGen = lambda : tablesDict["avoir_tag"]( give_used_key(idTagDict), give_used_key(idBoiteDict) ) generate_table(nbAvoirTag, "avoir_tag", funcGen) nbColorer : int = nbPieces if nbPieces < nbCouleurs else nbCouleurs funcGen = lambda : tablesDict["colorer"]( give_used_key(idPieceDict), give_used_key(idCouleurDict) ) generate_table(nbColorer, "colorer", funcGen) nbConstruire : int = nbBoites if nbBoites < nbModeles else nbModeles funcGen = lambda : tablesDict["construire"]( give_used_key(idBoiteDict), give_used_key(idModeleDict) ) generate_table(nbConstruire, "construire", funcGen) nbContenir : int = nbBoites if nbBoites < nbPieces else nbPieces funcGen = lambda x: tablesDict["contenir"]( give_used_key(idBoiteDict), give_used_key(idPieceDict), x ) generate_table_using_quantite(nbContenir, "contenir", funcGen) nbEnregistrer : int = nbBoites if nbBoites < nbMembres else nbMembres funcGen = lambda x: tablesDict["enregistrer"]( give_used_key(idBoiteDict), give_used_key(idMembreDict), x ) generate_table_using_quantite(nbEnregistrer, "enregistrer", funcGen) nbEtre : int = nbPieces - int(nbPieces / 4) funcGen = lambda : tablesDict["etre"]( give_used_key(idPieceDict), give_used_key(idMarqueDict) ) generate_table(nbEtre, "etre", funcGen) nbEtreComp : int = nbPieces - nbEtre funcGen = lambda : tablesDict["etre_complexe"]( give_used_key(idPieceDict), give_used_key(idPieceCompDict) ) generate_table(nbEtreComp, "etre_complexe", funcGen) nbEtreForme : int = nbPieces funcGen = lambda : tablesDict["etre_forme"]( give_used_key(idFormeDict), give_used_key(idPieceDict) ) generate_table(nbEtreForme, "etre_forme", funcGen) nbNecessiter : int = nbModeles if nbModeles < nbPieces else nbPieces funcGen = lambda x: tablesDict["necessiter"]( give_used_key(idModeleDict), give_used_key(idPieceDict), x ) generate_table_using_quantite(nbNecessiter, "necessiter", funcGen) nbNoter : int = nbModeles if nbModeles < nbMembres else nbMembres funcGen = lambda : tablesDict["noter"]( give_used_key(idModeleDict), give_used_key(idMembreDict) ) generate_table(nbNoter, "noter", funcGen) nbPerdre : int = nbPieces funcGen = lambda x: tablesDict["perdre"]( give_used_key(idMembreDict), give_used_key(idBoiteDict), give_used_key(idPieceDict), x ) generate_table_using_quantite(nbPerdre, "perdre", funcGen) nbVarier : int = nbModeles funcGen = lambda : tablesDict["varier"]( give_used_key(idModeleDict), give_used_key(idModeleDict) ) generate_table(nbVarier, "varier", funcGen) ########################################################################### # # To write `insert.sql`. # ########################################################################### # Remove duplicatas. for nameTable in tablesDict: remove_duplicata(give_path_file(nameTable)) # Writing in `insert.sql`. fileSql = open("src/insert.sql", 'w+') fileSql.write("TRUNCATE " + ', '.join(tablesDict) + ";\n") fileSql.write("\n") fileSql.close() with open("src/insert.sql", 'a') as fileSql: for nameTable in tablesDict: convert_table_to_sql(give_path_file(nameTable), nameTable) ###############################################################################