@@ -176,7 +176,7 @@ def _check_multitable_spec(ds_spec):
176
176
)
177
177
178
178
179
- def _table_name_of_path (table_path ):
179
+ def table_name_of_path (table_path ):
180
180
return table_path .split ("/" )[- 1 ]
181
181
182
182
@@ -387,7 +387,6 @@ def __init__(self, X, y=None, categorical_target=True):
387
387
# Initialize members
388
388
self .main_table = None
389
389
self .additional_data_tables = None
390
- self .relations = None
391
390
self .categorical_target = categorical_target
392
391
self .target_column = None
393
392
self .target_column_id = None
@@ -437,7 +436,8 @@ def __init__(self, X, y=None, categorical_target=True):
437
436
# Index the tables by name
438
437
self ._tables_by_name = {
439
438
table .name : table
440
- for table in [self .main_table ] + self .additional_data_tables
439
+ for table in [self .main_table ]
440
+ + [table for _ , table , _ in self .additional_data_tables ]
441
441
}
442
442
443
443
# Post-conditions
@@ -513,32 +513,21 @@ def _init_tables_from_mapping(self, X):
513
513
key = main_table_key ,
514
514
)
515
515
self .additional_data_tables = []
516
- self .relations = []
517
516
if "additional_data_tables" in X :
518
517
for table_path , table_spec in X ["additional_data_tables" ].items ():
519
518
table_source , table_key = table_spec [:2 ]
520
- table_name = _table_name_of_path (table_path )
519
+ table_name = table_name_of_path (table_path )
521
520
table = PandasTable (
522
521
table_name ,
523
522
table_source ,
524
- data_path = table_path ,
525
523
key = table_key ,
526
524
)
527
- self .additional_data_tables .append (table )
528
525
is_one_to_one_relation = False
529
526
if len (table_spec ) == 3 and table_spec [2 ] is True :
530
527
is_one_to_one_relation = True
531
528
532
- # Set relation parent: if no "/" in path, main_table is the parent
533
- if not "/" in table_path :
534
- parent_table_name = self .main_table .name
535
- else :
536
- table_path_fragments = table_path .split ("/" )
537
- parent_table_name = _table_name_of_path (
538
- "/" .join (table_path_fragments [:- 1 ])
539
- )
540
- self .relations .append (
541
- (parent_table_name , table_name , is_one_to_one_relation )
529
+ self .additional_data_tables .append (
530
+ (table_path , table , is_one_to_one_relation )
542
531
)
543
532
# Initialize a sparse dataset (monotable)
544
533
elif isinstance (main_table_source , sp .spmatrix ):
@@ -548,7 +537,6 @@ def _init_tables_from_mapping(self, X):
548
537
key = main_table_key ,
549
538
)
550
539
self .additional_data_tables = []
551
- self .relations = []
552
540
# Initialize a numpyarray dataset (monotable)
553
541
elif hasattr (main_table_source , "__array__" ):
554
542
self .main_table = NumpyTable (
@@ -561,7 +549,6 @@ def _init_tables_from_mapping(self, X):
561
549
"with pandas dataframe source tables"
562
550
)
563
551
self .additional_data_tables = []
564
- self .relations = []
565
552
else :
566
553
raise TypeError (
567
554
type_error_message (
@@ -680,11 +667,12 @@ def to_spec(self):
680
667
ds_spec = {}
681
668
ds_spec ["main_table" ] = (self .main_table .data_source , self .main_table .key )
682
669
ds_spec ["additional_data_tables" ] = {}
683
- for table in self .additional_data_tables :
684
- assert table . data_path is not None
685
- ds_spec ["additional_data_tables" ][table . data_path ] = (
670
+ for table_path , table , is_one_to_one_relation in self .additional_data_tables :
671
+ assert table_path is not None
672
+ ds_spec ["additional_data_tables" ][table_path ] = (
686
673
table .data_source ,
687
674
table .key ,
675
+ is_one_to_one_relation ,
688
676
)
689
677
690
678
return ds_spec
@@ -748,31 +736,32 @@ def create_khiops_dictionary_domain(self):
748
736
# Note: In general 'name' and 'object_type' fields of Variable can be different
749
737
if self .additional_data_tables :
750
738
main_dictionary .root = True
751
- table_names = [table .name for table in self .additional_data_tables ]
752
- tables_to_visit = [self .main_table .name ]
753
- while tables_to_visit :
754
- current_table = tables_to_visit .pop (0 )
755
- for relation in self .relations :
756
- parent_table , child_table , is_one_to_one_relation = relation
757
- if parent_table == current_table :
758
- tables_to_visit .append (child_table )
759
- parent_table_name = parent_table
760
- index_table = table_names .index (child_table )
761
- table = self .additional_data_tables [index_table ]
762
- parent_table_dictionary = dictionary_domain .get_dictionary (
763
- parent_table_name
764
- )
765
- dictionary = table .create_khiops_dictionary ()
766
- dictionary_domain .add_dictionary (dictionary )
767
- table_variable = kh .Variable ()
768
- if is_one_to_one_relation :
769
- table_variable .type = "Entity"
770
- else :
771
- table_variable .type = "Table"
772
- table_variable .name = table .name
773
- table_variable .object_type = table .name
774
- parent_table_dictionary .add_variable (table_variable )
739
+ for (
740
+ table_path ,
741
+ table ,
742
+ is_one_to_one_relation ,
743
+ ) in self .additional_data_tables :
744
+ if not "/" in table_path :
745
+ parent_table_name = self .main_table .name
746
+ else :
747
+ table_path_fragments = table_path .split ("/" )
748
+ parent_table_name = table_name_of_path (
749
+ "/" .join (table_path_fragments [:- 1 ])
750
+ )
751
+ parent_table_dictionary = dictionary_domain .get_dictionary (
752
+ parent_table_name
753
+ )
775
754
755
+ dictionary = table .create_khiops_dictionary ()
756
+ dictionary_domain .add_dictionary (dictionary )
757
+ table_variable = kh .Variable ()
758
+ if is_one_to_one_relation :
759
+ table_variable .type = "Entity"
760
+ else :
761
+ table_variable .type = "Table"
762
+ table_variable .name = table .name
763
+ table_variable .object_type = table .name
764
+ parent_table_dictionary .add_variable (table_variable )
776
765
return dictionary_domain
777
766
778
767
def create_table_files_for_khiops (self , output_dir , sort = True ):
@@ -811,9 +800,9 @@ def create_table_files_for_khiops(self, output_dir, sort=True):
811
800
812
801
# Create a copy of each secondary table
813
802
secondary_table_paths = {}
814
- for table in self .additional_data_tables :
815
- assert table . data_path is not None
816
- secondary_table_paths [table . data_path ] = table .create_table_file_for_khiops (
803
+ for table_path , table , _ in self .additional_data_tables :
804
+ assert table_path is not None
805
+ secondary_table_paths [table_path ] = table .create_table_file_for_khiops (
817
806
output_dir , sort = sort
818
807
)
819
808
@@ -918,13 +907,11 @@ class PandasTable(DatasetTable):
918
907
Name for the table.
919
908
dataframe : `pandas.DataFrame`
920
909
The data frame to be encapsulated. It must be non-empty.
921
- data_path : str, optional
922
- Data path of the table. Unset for main tables.
923
910
key : list of str, optional
924
911
The names of the columns composing the key.
925
912
"""
926
913
927
- def __init__ (self , name , dataframe , data_path = None , key = None ):
914
+ def __init__ (self , name , dataframe , key = None ):
928
915
# Call the parent method
929
916
super ().__init__ (name = name , key = key )
930
917
@@ -937,7 +924,6 @@ def __init__(self, name, dataframe, data_path=None, key=None):
937
924
# Initialize the attributes
938
925
self .data_source = dataframe
939
926
self .n_samples = len (self .data_source )
940
- self .data_path = data_path
941
927
942
928
# Initialize feature columns and verify their types
943
929
self .column_ids = self .data_source .columns .values
0 commit comments