huggingface · tshasan · Jul 16, 2025
diff --git a/scripts/convert.py b/scripts/convert.py
@@ -1,4 +1,3 @@
-
 import json
 import os
 import shutil
@@ -14,10 +13,11 @@
 
 import onnxslim
 from optimum.exporters.onnx import main_export, export_models
-from optimum.onnx.graph_transformations import check_and_save_model
 from optimum.exporters.tasks import TasksManager
-
-from .quantize import QuantizationArguments, quantize
+from optimum.onnxruntime import ORTQuantizer
+from optimum.onnxruntime.configuration import QuantizationConfig
+from quantize import QuantizationArguments, quantize
+from utils import check_and_save_model
 
 NO_PER_CHANNEL_REDUCE_RANGE_MODELS = {
     # Decoder-only models
@@ -260,37 +260,37 @@ def main():
 
     # Handle special cases
     if config.model_type == 'marian':
-        from .extra.marian import generate_tokenizer_json
+        from extra.marian import generate_tokenizer_json
         tokenizer_json = generate_tokenizer_json(model_id, tokenizer)
 
         with open(os.path.join(output_model_folder, 'tokenizer.json'), 'w', encoding='utf-8') as fp:
             json.dump(tokenizer_json, fp, indent=4)
 
     elif config.model_type == 'esm':
-        from .extra.esm import generate_fast_tokenizer
+        from extra.esm import generate_fast_tokenizer
         fast_tokenizer = generate_fast_tokenizer(tokenizer)
         fast_tokenizer.save(os.path.join(
             output_model_folder, 'tokenizer.json'))
 
     elif config.model_type == 'whisper':
         if conv_args.output_attentions:
-            from .extra.whisper import get_main_export_kwargs
+            from extra.whisper import get_main_export_kwargs
 
             export_kwargs.update(
                 **get_main_export_kwargs(config, "automatic-speech-recognition")
             )
 
     elif config.model_type in ('wav2vec2', 'wav2vec2-bert', 'hubert', 'unispeech', 'unispeech-sat'):
         if tokenizer is not None:
-            from .extra.wav2vec2 import generate_tokenizer_json
+            from extra.wav2vec2 import generate_tokenizer_json
             tokenizer_json = generate_tokenizer_json(tokenizer)
 
             with open(os.path.join(output_model_folder, 'tokenizer.json'), 'w', encoding='utf-8') as fp:
                 json.dump(tokenizer_json, fp, indent=4)
 
     elif config.model_type == 'vits':
         if tokenizer is not None:
-            from .extra.vits import generate_tokenizer_json
+            from extra.vits import generate_tokenizer_json
             tokenizer_json = generate_tokenizer_json(tokenizer)
 
             with open(os.path.join(output_model_folder, 'tokenizer.json'), 'w', encoding='utf-8') as fp:
@@ -302,7 +302,7 @@ def main():
             "vocoder": "microsoft/speecht5_hifigan"}
 
         if tokenizer is not None:
-            from .extra.speecht5 import generate_tokenizer_json
+            from extra.speecht5 import generate_tokenizer_json
             tokenizer_json = generate_tokenizer_json(tokenizer)
 
             with open(os.path.join(output_model_folder, 'tokenizer.json'), 'w', encoding='utf-8') as fp:
@@ -314,7 +314,7 @@ def main():
         export_kwargs['batch_size'] = 1
 
     elif config.model_type == 'openelm':
-        from .extra.openelm import OpenElmOnnxConfig
+        from extra.openelm import OpenElmOnnxConfig
 
         config = AutoConfig.from_pretrained(
             model_id, trust_remote_code=conv_args.trust_remote_code)
@@ -347,7 +347,7 @@ def main():
 
         if config.model_type == 'clip':
             # Handle special case for exporting text and vision models separately
-            from .extra.clip import CLIPTextModelWithProjectionOnnxConfig, CLIPVisionModelWithProjectionOnnxConfig
+            from extra.clip import CLIPTextModelWithProjectionOnnxConfig, CLIPVisionModelWithProjectionOnnxConfig
             from transformers.models.clip import CLIPTextModelWithProjection, CLIPVisionModelWithProjection
 
             text_model = CLIPTextModelWithProjection.from_pretrained(
@@ -365,7 +365,7 @@ def main():
 
         elif config.model_type == 'siglip':
             # Handle special case for exporting text and vision models separately
-            from .extra.siglip import SiglipTextModelOnnxConfig, SiglipVisionModelOnnxConfig
+            from extra.siglip import SiglipTextModelOnnxConfig, SiglipVisionModelOnnxConfig
             from transformers.models.siglip import SiglipTextModel, SiglipVisionModel
 
             text_model = SiglipTextModel.from_pretrained(
@@ -384,7 +384,7 @@ def main():
         # TODO: Enable once https://github.com/huggingface/optimum/pull/1552 is merged
         # elif config.model_type == 'clap':
         #     # Handle special case for exporting text and audio models separately
-        #     from .extra.clap import ClapTextModelWithProjectionOnnxConfig, ClapAudioModelWithProjectionOnnxConfig
+        #     from extra.clap import ClapTextModelWithProjectionOnnxConfig, ClapAudioModelWithProjectionOnnxConfig
         #     from transformers.models.clap import ClapTextModelWithProjection, ClapAudioModelWithProjection
 
         #     text_model = ClapTextModelWithProjection.from_pretrained(model_id, **from_pretrained_kwargs)
@@ -442,7 +442,7 @@ def main():
     # Step 4. Update the generation config if necessary
     if config.model_type == 'whisper':
         from transformers import GenerationConfig
-        from .extra.whisper import get_alignment_heads
+        from extra.whisper import get_alignment_heads
 
         generation_config = GenerationConfig.from_pretrained(
             model_id, **from_pretrained_kwargs)

diff --git a/scripts/quantize.py b/scripts/quantize.py
@@ -15,8 +15,8 @@
 from onnxruntime.quantization.matmul_4bits_quantizer import MatMul4BitsQuantizer
 from onnxruntime.quantization.matmul_bnb4_quantizer import MatMulBnb4Quantizer
 
-from . import float16
-from .utils import check_and_save_model
+import float16
+from utils import check_and_save_model
 
 class QuantMode(Enum):
     # F32 = 'fp32'

diff --git a/scripts/requirements.txt b/scripts/requirements.txt
@@ -1,7 +1,7 @@
 transformers[torch]==4.49.0
 onnxruntime==1.20.1
 optimum@git+https://github.com/huggingface/optimum.git@b04feaea78cda58d79b8da67dca3fd0c4ab33435
-onnx==1.17.0
+onnx==1.18.0
 tqdm==4.67.1
 onnxslim==0.1.48
 numpy==2.2.6