21
21
import khiops .core as kh
22
22
import khiops .core .internals .filesystems as fs
23
23
from khiops .core .dictionary import VariableBlock
24
- from khiops .core .internals .common import is_dict_like , is_list_like , type_error_message
24
+ from khiops .core .internals .common import (
25
+ deprecation_message ,
26
+ is_dict_like ,
27
+ is_list_like ,
28
+ type_error_message ,
29
+ )
25
30
26
31
# Disable PEP8 variable names because of scikit-learn X,y conventions
27
32
# To capture invalid-names other than X,y run:
@@ -171,6 +176,54 @@ def _check_multitable_spec(ds_spec):
171
176
)
172
177
173
178
179
+ def _table_name_of_path (table_path ):
180
+ return table_path .split ("/" )[- 1 ]
181
+
182
+
183
+ def _upgrade_mapping_spec (ds_spec ):
184
+ assert is_dict_like (ds_spec )
185
+ new_ds_spec = {}
186
+ new_ds_spec ["additional_data_tables" ] = {}
187
+ for table_name , table_data in ds_spec ["tables" ].items ():
188
+ table_df , table_key = table_data
189
+ if not is_list_like (table_key ):
190
+ table_key = [table_key ]
191
+ if table_name == ds_spec ["main_table" ]:
192
+ new_ds_spec ["main_table" ] = (table_df , table_key )
193
+ else :
194
+ table_path = [table_name ]
195
+ is_entity = False
196
+
197
+ # Cycle 4 times on the relations to get all transitive relation, like:
198
+ # - current table name N
199
+ # - main table name N1
200
+ # - and relations: (N1, N2), (N2, N3), (N3, N)
201
+ # the data-path must be N2/N3/N
202
+ # Note: this is a heuristic that should be replaced with a graph
203
+ # traversal procedure
204
+ # If no "relations" key exists, then one has a star schema and
205
+ # the data-paths are the names of the secondary tables themselves
206
+ # (with respect to the main table)
207
+ if "relations" in ds_spec :
208
+ for relation in list (ds_spec ["relations" ]) * 4 :
209
+ left , right = relation [:2 ]
210
+ if len (relation ) == 3 and right == table_name :
211
+ is_entity = relation [2 ]
212
+ if (
213
+ left != ds_spec ["main_table" ]
214
+ and left not in table_path
215
+ and right in table_path
216
+ ):
217
+ table_path .insert (0 , left )
218
+ table_path = "/" .join (table_path )
219
+ if is_entity :
220
+ table_data = (table_df , table_key , is_entity )
221
+ else :
222
+ table_data = (table_df , table_key )
223
+ new_ds_spec ["additional_data_tables" ][table_path ] = table_data
224
+ return new_ds_spec
225
+
226
+
174
227
def get_khiops_type (numpy_type ):
175
228
"""Translates a numpy dtype to a Khiops dictionary type
176
229
@@ -426,14 +479,26 @@ def _check_input_sequence(self, X, key=None):
426
479
# Check the key for the main_table (it is the same for the others)
427
480
_check_table_key ("main_table" , key )
428
481
429
- def _table_name_of_path (self , table_path ):
430
- # TODO: Add >= 128-character truncation and indexing scheme
431
- return table_path .split ("/" )[- 1 ]
432
-
433
482
def _init_tables_from_mapping (self , X ):
434
483
"""Initializes the table spec from a dict-like 'X'"""
435
484
assert is_dict_like (X ), "'X' must be dict-like"
436
485
486
+ # Detect if deprecated mapping specification syntax is used;
487
+ # if so, issue deprecation warning and transform it to the new syntax
488
+ if "tables" in X .keys () and isinstance (X .get ("main_table" ), str ):
489
+ warnings .warn (
490
+ deprecation_message (
491
+ "This multi-table dataset specification format" ,
492
+ "11.0.1" ,
493
+ replacement = (
494
+ "the new data-path-based format, as documented in "
495
+ ":doc:`multi_table_primer`."
496
+ ),
497
+ quote = False ,
498
+ )
499
+ )
500
+ X = _upgrade_mapping_spec (X )
501
+
437
502
# Check the input mapping
438
503
check_dataset_spec (X )
439
504
@@ -452,7 +517,7 @@ def _init_tables_from_mapping(self, X):
452
517
if "additional_data_tables" in X :
453
518
for table_path , table_spec in X ["additional_data_tables" ].items ():
454
519
table_source , table_key = table_spec [:2 ]
455
- table_name = self . _table_name_of_path (table_path )
520
+ table_name = _table_name_of_path (table_path )
456
521
table = PandasTable (
457
522
table_name ,
458
523
table_source ,
@@ -469,7 +534,7 @@ def _init_tables_from_mapping(self, X):
469
534
parent_table_name = self .main_table .name
470
535
else :
471
536
table_path_fragments = table_path .split ("/" )
472
- parent_table_name = self . _table_name_of_path (
537
+ parent_table_name = _table_name_of_path (
473
538
"/" .join (table_path_fragments [:- 1 ])
474
539
)
475
540
self .relations .append (
0 commit comments