lego/PostgreSQL/generate_data.py
2024-11-26 10:25:14 +01:00

629 lines
22 KiB
Python

###############################################################################
#
# PYTHON SCRIPT TO RANDOMLY GENERATE DATA
# STEPS :
# 1 --> Generate a line of a table.
# 2 --> Generate an entire table.
# 3 --> Resolve the issues of dependencies and others.
# 4 --> Convert the created tables to the PostgreSQL code.
# Bonus --> Enjoy !
#
###############################################################################
import os
import random
import csv
###############################################################################
#
# HERE IS ALL THE RAW DATA
#
# It is just csv files with for each a single column.
#
###############################################################################
pathIntegers : str = ".csv/row_numbers.csv"
pathSentences : str = ".csv/sentences.csv"
pathUrls : str = ".csv/urls.csv"
pathWords : str = ".csv/words.csv"
pathNames : str = ".csv/names.csv"
pathNameModels : str = ".csv/name_models.csv"
pathDates : str = ".csv/dates.csv"
pathColors : str = ".csv/colors.csv"
pathBrands : str = ".csv/brands.csv"
###############################################################################
#
# TO KNOW WHICH NUMBER IS USED FOR A PRIMARY KEY
#
###############################################################################
def initialize_dict_primary_key(pathFile : str) -> dict:
"""
:param pathFile: merely is `pathIntegers`.
"""
d : dict = {}
with open(pathFile, 'r') as toto:
csvList = list(csv.reader(toto))
for element in csvList[1:]:
d[int(element[0])] = 0
return d
###############################################################################
# id_truc = { X (int) : 1 or 0 } 1 for used, 0 for not.
idMembreDict : dict = initialize_dict_primary_key(pathIntegers)
idBoiteDict : dict = initialize_dict_primary_key(pathIntegers)
idCouleurDict : dict = initialize_dict_primary_key(pathIntegers)
idFilDict : dict = initialize_dict_primary_key(pathIntegers)
idFormeDict : dict = initialize_dict_primary_key(pathIntegers)
idIllustrDict : dict = initialize_dict_primary_key(pathIntegers)
idMarqueDict : dict = initialize_dict_primary_key(pathIntegers)
idMessageDict : dict = initialize_dict_primary_key(pathIntegers)
idModeleDict : dict = initialize_dict_primary_key(pathIntegers)
idMotifDict : dict = initialize_dict_primary_key(pathIntegers)
idPieceDict : dict = initialize_dict_primary_key(pathIntegers)
idPieceCompDict : dict = initialize_dict_primary_key(pathIntegers)
idTagDict : dict = initialize_dict_primary_key(pathIntegers)
###############################################################################
#
# STEP 1 : GENERATE A LINE OF A TABLE
#
###############################################################################
def give_used_key(idDict : dict) -> str:
"""
:param idDict: idTructDict.
:return: a key X that is used i.e idTrucDict[X] == 1
"""
dictUsed : dict = { k: v for k, v in idDict.items() if v == 1 }
keysUsed : list = list(dictUsed.keys())
length : int = len(keysUsed)
# If the associated table has no instances yet.
if length == 0:
return ''
randomKey : int = keysUsed[random.randrange(length)]
return str(randomKey)
###############################################################################
def give_not_used_key(idDict : dict) -> str:
"""
:param idDict: idTructDict.
:return: a key X that is not used i.e idTrucDict[X] == 0
It updates the dict : 0 --> 1.
"""
dictNotUsed : dict = { k: v for k, v in idDict.items() if v == 0 }
keysNotUsed : list = list(dictNotUsed.keys())
# If the associated table has no instances yet.
if len(keysNotUsed) == 0:
return ''
randomKey : int = random.choice(keysNotUsed)
idDict[randomKey] = 1
return str(randomKey)
###############################################################################
#
# USED FOR NOT PRIMARY KEY VARIABLES
#
###############################################################################
def random_element(pathFile : str) -> str:
"""
:param pathFile: the relative path of the csv file to read.
:return: a random element from this file.
"""
with open(pathFile, 'r') as file:
csvList : list = list(csv.reader(file))
random_index_line : int = random.randint(1, len(csvList) - 1)
return "" if ( csvList[random_index_line] == [] ) \
else csvList[random_index_line][0]
###############################################################################
def construct_line(*args : tuple) -> str:
"""
:param *args: a tuple of elements.
example : [ "1", "'toto'", "'Lorem PIPsum'", "42" ].
:return: a line to give to the sql.
example : "(1, 'toto', 'Lorem PIPsum', 42)".
"""
return "(" + ', '.join(list(args)) + ")"
###############################################################################
def generate_line_acheter(idMembre : str, idPiece : str,
quantite : str) -> str:
return construct_line(idMembre, idPiece, quantite)
def generate_line_avoir_motif(idPiece : str, idMotif : str) -> str:
return construct_line(idPiece, idMotif)
def generate_line_avoir_tag(idTag : str, idBoite : str) -> str:
return construct_line(idTag, idBoite)
def generate_line_boites(idMarque : str) -> str:
idBoite : str = give_not_used_key(idBoiteDict)
titre : str = "\'" + random_element(pathWords) + "\'"
dateBoite : str = "\'" + random_element(pathDates) + "\'"
return construct_line(idBoite, titre, dateBoite, idMarque)
def generate_line_colorer(idPiece : str, idCouleur : str) -> str:
return construct_line(idPiece, idCouleur)
def generate_line_construire(idBoite : str, idModele : str) -> str:
return construct_line(idBoite, idModele)
def generate_line_contenir(idBoite : str, idPiece : str,
quantite : str) -> str:
return construct_line(idBoite, idPiece, quantite)
def generate_line_couleurs() -> str:
idCouleur : str = give_not_used_key(idCouleurDict)
nomCouleur : str = "\'" + random_element(pathColors) + "\'"
return construct_line(idCouleur, nomCouleur)
def generate_line_enregistrer(idBoite : str, idMembre : str,
quantite : str) -> str:
return construct_line(idBoite, idMembre, quantite)
def generate_line_etre(idPiece : str, idMarque : str) -> str:
return construct_line(idPiece, idMarque)
def generate_line_etre_complexe(idPiece : str, idPieceComp : str) -> str:
return construct_line(idPiece, idPieceComp)
def generate_line_etre_forme(idForme : str, idPiece : str) -> str:
return construct_line(idForme, idPiece)
def generate_line_fils(idModele : str) -> str:
idFil : str = give_not_used_key(idFilDict)
return construct_line(idFil, idModele)
def generate_line_illustrations(idModele : str) -> str:
idIllustr : str = give_not_used_key(idIllustrDict)
urlIllustr : str = "\'" + random_element(pathUrls) + "\'"
return construct_line(idIllustr, urlIllustr, idModele)
def generate_line_marques() -> str:
idMarque : str = give_not_used_key(idMarqueDict)
nomMarque : str = "\'" + random_element(pathBrands) + "\'"
return construct_line(idMarque, nomMarque)
def generate_line_membres() -> str:
idMembre : str = give_not_used_key(idMembreDict)
nomMembre : str = "\'" + random_element(pathNames) + "\'"
return construct_line(idMembre, nomMembre)
def generate_line_messages(idMembre : str, idFil : str,
idMessage2 : str) -> str:
idMessage : str = give_not_used_key(idMessageDict)
contenu : str = "\'" + random_element(pathSentences) + "\'"
# When the first message is declared.
if idMessage2 == '':
idMessage2 = idMessage
return construct_line(idMessage, contenu, idMembre, idFil, \
idMessage2)
def generate_line_modeles(idMembre : str, idModeleEte : str) -> str:
idModele : str = give_not_used_key(idModeleDict)
nomModele : str = "\'" + random_element(pathNameModels) + "\'"
urlNotice : str = "\'" + random_element(pathUrls) + "\'"
# When the first modele is declared.
if idModeleEte == '':
idModeleEte = idModele
return construct_line(idModele, nomModele, urlNotice, idMembre, \
idModeleEte)
def generate_line_necessiter(idModele : str, idPiece : str,
quantite : str) -> str:
return construct_line(idModele, idPiece, quantite)
def generate_line_noter(idModele : str, idMembre : str) -> str:
note : str = "\'" + random_element(pathSentences) + "\'"
return construct_line(idModele, idMembre, note)
def generate_line_perdre(idMembre : str, idBoite : str,
idPiece : str, quantite : str) -> str:
return construct_line(idMembre, idBoite, idPiece, quantite)
def generate_line_tags() -> str:
idTag : str = give_not_used_key(idTagDict)
nomTag : str = "\'" + random_element(pathWords) + "\'"
return construct_line(idTag, nomTag)
def generate_line_varier(idModele_1 : str, idModele_et : str) -> str:
return construct_line(idModele_1, idModele_et)
def generate_line_formes() -> str:
idForme : str = give_not_used_key(idFormeDict)
nomForme : str = "\'" + random_element(pathWords) + "\'"
return construct_line(idForme, nomForme)
def generate_line_motifs() -> str:
idMotif : str = give_not_used_key(idMotifDict)
nomMotif : str = "\'" + random_element(pathWords) + "\'"
return construct_line(idMotif, nomMotif)
def generate_line_pieces() -> str:
idPiece : str = give_not_used_key(idPieceDict)
return construct_line(idPiece)
def generate_line_pieces_complexes() -> str:
idPieceCo : str = give_not_used_key(idPieceCompDict)
return construct_line(idPieceCo)
###############################################################################
#
# STEP 2 : GENERATE AN ENTIRE TABLE
#
# It is temporarily stored in a file with the extension .data
#
###############################################################################
def store_a_line_in_table(pathFile : str, funcGenerationLine) -> None:
"""
:param pathFile: the relative path where we want to store the table.
:param funcGenerationLine: the function generate_line_* for the table.
Write the line of the table in a .data file.
"""
with open(pathFile, 'a+') as file: # w+ : if does not exist.
file.write(funcGenerationLine() + ",\n")
###############################################################################
def store_table(nbElements : int, nameTable : str, funcGenerationLine) -> None:
"""
:param nbElements: the number of instances we want to get.
:param nameTable: from `dictTables.keys()`.
:param funcGenerationLine: the function generate_line_* for the table.
"""
for i in range(nbElements):
store_a_line_in_table(give_path_file(nameTable), funcGenerationLine)
###############################################################################
def generate_table(nbElements : int, nameTable : str, funcGen) -> None:
for i in range(nbElements):
store_table(1, nameTable, funcGen)
###############################################################################
def generate_table_using_quantite(nbElements : int, nameTable : str,
funcGen) -> None:
total : int = nbElements
for i in range(nbElements):
quantite : int = random.randint(0, int(total / 10)) if i < nbElements \
else total
total = total - quantite
funcGen_2 = lambda : funcGen(
str(quantite)
)
store_table(1, nameTable, funcGen_2)
###############################################################################
#
# STEP 3 : RESOLVE THE ISSUES OF DEPENDENCIES AND OTHERS
#
# Just edits the .data files.
#
###############################################################################
def remove_duplicata(pathFile : str) -> None:
"""
:param pathFile: a .data file.
"""
lines_seen = set() # holds lines already seen.
outfile = open("temp.data", "w")
for line in open(pathFile, "r"):
if line not in lines_seen: # not a duplicate.
outfile.write(line)
lines_seen.add(line)
outfile.close()
os.rename("temp.data", pathFile)
###############################################################################
#
# STEP 4 : CONVERSION TO PostgreSQL
#
###############################################################################
def convert_table_to_sql(pathFile : str, nameTable : str) -> None:
"""
:param pathFile: the relative path to the .data file
which contains the table.
:param nameTable: the name of the table.
Write the code in append mode to the file called `insert.sql`.
"""
fileSql = open("src/insert.sql", 'a') # append mode.
fileSql.write("INSERT INTO " + nameTable + " VALUES\n")
if os.path.exists(pathFile):
with open(pathFile, 'r') as fileData:
fileSql.writelines(fileData.readlines())
fileSql.close()
# Replace last ',' by ';'.
with open("src/insert.sql", 'rb+') as fileSql:
fileSql.seek(-2, 2)
fileSql.truncate()
with open("src/insert.sql", 'a') as fileSql:
fileSql.write(";\n")
fileSql.write("\n")
###############################################################################
tablesDict : dict = {
"membres" : generate_line_membres,
"couleurs" : generate_line_couleurs,
"marques" : generate_line_marques,
"tags" : generate_line_tags,
"formes" : generate_line_formes,
"motifs" : generate_line_motifs,
"pieces" : generate_line_pieces,
"pieces_complexes" : generate_line_pieces_complexes,
"modeles" : generate_line_modeles,
"boites" : generate_line_boites,
"fils" : generate_line_fils,
"messages" : generate_line_messages,
"illustrations" : generate_line_illustrations,
"acheter" : generate_line_acheter,
"avoir_motif" : generate_line_avoir_motif,
"avoir_tag" : generate_line_avoir_tag,
"colorer" : generate_line_colorer,
"construire" : generate_line_construire,
"contenir" : generate_line_contenir,
"enregistrer" : generate_line_enregistrer,
"etre" : generate_line_etre,
"etre_complexe" : generate_line_etre_complexe,
"etre_forme" : generate_line_etre_forme,
"necessiter" : generate_line_necessiter,
"noter" : generate_line_noter,
"perdre" : generate_line_perdre,
"varier" : generate_line_varier
}
###############################################################################
def give_path_file(nameTable : str) -> str:
repertory : str = ".data/"
return repertory + nameTable + ".data"
###############################################################################
if __name__ == '__main__':
###########################################################################
#
# The tables that have no foreign keys.
#
###########################################################################
nbMembres : int = 101
store_table(nbMembres, "membres", tablesDict["membres"])
nbCouleurs : int = 21
store_table(nbCouleurs, "couleurs", tablesDict["couleurs"])
nbMarques : int = 10
store_table(nbMarques, "marques", tablesDict["marques"])
nbTags : int = 69
store_table(nbTags, "tags", tablesDict["tags"])
nbFormes : int = 69
store_table(nbFormes, "formes", tablesDict["formes"])
nbMotifs : int = 100
store_table(nbMotifs, "motifs", tablesDict["motifs"])
nbPieces : int = 750
store_table(nbPieces, "pieces", tablesDict["pieces"])
nbPiecesComp : int = 250
store_table(nbPiecesComp, "pieces_complexes",
tablesDict["pieces_complexes"])
###########################################################################
#
# The tables that are needed for the verb tables.
#
###########################################################################
nbModeles : int = 10
funcGen = lambda : tablesDict["modeles"](
give_used_key(idMembreDict),
give_used_key(idModeleDict)
)
generate_table(nbModeles, "modeles", funcGen)
nbBoites : int = 20
funcGen = lambda : tablesDict["boites"](
give_used_key(idMarqueDict)
)
generate_table(nbBoites, "boites", funcGen)
nbFils : int = 100
funcGen = lambda : tablesDict["fils"](
give_used_key(idModeleDict)
)
generate_table(nbFils, "fils", funcGen)
nbMessages : int = 300
funcGen = lambda : tablesDict["messages"](
give_used_key(idMembreDict),
give_used_key(idFilDict),
give_used_key(idMessageDict)
)
generate_table(nbMessages, "messages", funcGen)
nbIllustrations : int = nbModeles
funcGen = lambda : tablesDict["illustrations"](
give_used_key(idModeleDict)
)
generate_table(nbIllustrations, "illustrations", funcGen)
###########################################################################
#
# The verb tables.
#
###########################################################################
nbAcheter : int = nbMembres if nbMembres < nbPieces else nbPieces
funcGen = lambda x: tablesDict["acheter"](
give_used_key(idMembreDict),
give_used_key(idPieceDict),
x
)
generate_table_using_quantite(nbAcheter, "acheter", funcGen)
nbAvoirMotif : int = nbPieces if nbPieces < nbMotifs else nbMotifs
funcGen = lambda : tablesDict["avoir_motif"](
give_used_key(idPieceDict),
give_used_key(idMotifDict)
)
generate_table(nbAvoirMotif, "avoir_motif", funcGen)
nbAvoirTag : int = nbTags if nbTags < nbBoites else nbBoites
funcGen = lambda : tablesDict["avoir_tag"](
give_used_key(idTagDict),
give_used_key(idBoiteDict)
)
generate_table(nbAvoirTag, "avoir_tag", funcGen)
nbColorer : int = nbPieces if nbPieces < nbCouleurs else nbCouleurs
funcGen = lambda : tablesDict["colorer"](
give_used_key(idPieceDict),
give_used_key(idCouleurDict)
)
generate_table(nbColorer, "colorer", funcGen)
nbConstruire : int = nbBoites if nbBoites < nbModeles else nbModeles
funcGen = lambda : tablesDict["construire"](
give_used_key(idBoiteDict),
give_used_key(idModeleDict)
)
generate_table(nbConstruire, "construire", funcGen)
nbContenir : int = nbBoites if nbBoites < nbPieces else nbPieces
funcGen = lambda x: tablesDict["contenir"](
give_used_key(idBoiteDict),
give_used_key(idPieceDict),
x
)
generate_table_using_quantite(nbContenir, "contenir", funcGen)
nbEnregistrer : int = nbBoites if nbBoites < nbMembres else nbMembres
funcGen = lambda x: tablesDict["enregistrer"](
give_used_key(idBoiteDict),
give_used_key(idMembreDict),
x
)
generate_table_using_quantite(nbEnregistrer, "enregistrer", funcGen)
nbEtre : int = nbPieces - int(nbPieces / 4)
funcGen = lambda : tablesDict["etre"](
give_used_key(idPieceDict),
give_used_key(idMarqueDict)
)
generate_table(nbEtre, "etre", funcGen)
nbEtreComp : int = nbPieces - nbEtre
funcGen = lambda : tablesDict["etre_complexe"](
give_used_key(idPieceDict),
give_used_key(idPieceCompDict)
)
generate_table(nbEtreComp, "etre_complexe", funcGen)
nbEtreForme : int = nbPieces
funcGen = lambda : tablesDict["etre_forme"](
give_used_key(idFormeDict),
give_used_key(idPieceDict)
)
generate_table(nbEtreForme, "etre_forme", funcGen)
nbNecessiter : int = nbModeles if nbModeles < nbPieces else nbPieces
funcGen = lambda x: tablesDict["necessiter"](
give_used_key(idModeleDict),
give_used_key(idPieceDict),
x
)
generate_table_using_quantite(nbNecessiter, "necessiter", funcGen)
nbNoter : int = nbModeles if nbModeles < nbMembres else nbMembres
funcGen = lambda : tablesDict["noter"](
give_used_key(idModeleDict),
give_used_key(idMembreDict)
)
generate_table(nbNoter, "noter", funcGen)
nbPerdre : int = nbPieces
funcGen = lambda x: tablesDict["perdre"](
give_used_key(idMembreDict),
give_used_key(idBoiteDict),
give_used_key(idPieceDict),
x
)
generate_table_using_quantite(nbPerdre, "perdre", funcGen)
nbVarier : int = nbModeles
funcGen = lambda : tablesDict["varier"](
give_used_key(idModeleDict),
give_used_key(idModeleDict)
)
generate_table(nbVarier, "varier", funcGen)
###########################################################################
#
# To write `insert.sql`.
#
###########################################################################
# Remove duplicatas.
for nameTable in tablesDict:
remove_duplicata(give_path_file(nameTable))
# Writing in `insert.sql`.
fileSql = open("src/insert.sql", 'w+')
fileSql.write("TRUNCATE " + ', '.join(tablesDict) + ";\n")
fileSql.write("\n")
fileSql.close()
with open("src/insert.sql", 'a') as fileSql:
for nameTable in tablesDict:
convert_table_to_sql(give_path_file(nameTable), nameTable)
###############################################################################