RauhanAhmed
diff --git a/‎.gitattributes
Lines changed: 35 additions & 0 deletions b/‎.gitattributes
Lines changed: 35 additions & 0 deletions
diff --git a/‎.github/workflows/ci-cd.yaml
Lines changed: 27 additions & 0 deletions b/‎.github/workflows/ci-cd.yaml
Lines changed: 27 additions & 0 deletions
diff --git a/‎Dockerfile
Lines changed: 15 additions & 0 deletions b/‎Dockerfile
Lines changed: 15 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 11 additions & 0 deletions b/‎README.md
Lines changed: 11 additions & 0 deletions
diff --git a/‎app.py
Lines changed: 57 additions & 0 deletions b/‎app.py
Lines changed: 57 additions & 0 deletions
diff --git a/‎config.ini
Lines changed: 7 additions & 0 deletions b/‎config.ini
Lines changed: 7 additions & 0 deletions
diff --git a/‎params.yaml
Lines changed: 62 additions & 0 deletions b/‎params.yaml
Lines changed: 62 additions & 0 deletions
diff --git a/‎requirements.txt
Lines changed: 13 additions & 0 deletions b/‎requirements.txt
Lines changed: 13 additions & 0 deletions
diff --git a/‎setup.py
Lines changed: 33 additions & 0 deletions b/‎setup.py
Lines changed: 33 additions & 0 deletions
diff --git a/‎src/__init__.py b/‎src/__init__.py
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
@@ -0,0 +1,27 @@
+name: CI/CD
+
+on:
+  push:
+
+jobs:
+  docker:
+    runs-on: ubuntu-latest
+    steps:
+      -
+        name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      -
+        name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_TOKEN }}
+      -
+        name: Build and push
+        uses: docker/build-push-action@v6
+        with:
+          push: true
+          tags: siddiquirauhan/autodataanalyzer:latest
@@ -0,0 +1,15 @@
+FROM python:3.10-slim
+
+COPY . /app
+
+WORKDIR /app
+
+USER root
+
+RUN pip install -r requirements.txt
+
+RUN chmod -R 777 /app
+
+EXPOSE 7860
+
+CMD ["python", "app.py"]
@@ -0,0 +1,11 @@
+---
+title: DataAnalyser
+emoji: 🏃
+colorFrom: gray
+colorTo: purple
+sdk: docker
+pinned: false
+license: mit
+---
+
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
@@ -0,0 +1,57 @@
+from src.pipelines.pipeline import CompletePipeline
+from pywebio.platform.flask import start_server
+from src.utils.functions import getConfig
+from pywebio.output import *
+from pywebio.input import *
+
+def main():
+    """
+    Main function to run the application, handle user inputs, and interact with the pipeline.
+    
+    This function facilitates file uploads, processes user queries, and displays responses
+    using the CompletePipeline. The application continues to run until the user inputs "exit".
+    """
+    pipeline = CompletePipeline()
+    
+    inputData = input_group("Data Upload", 
+                            inputs=[
+                                file_upload(name="files", label="Upload Files", accept=".csv", multiple=True, placeholder="Drop your CSV files here"),
+                                file_upload(name="metadata", label="Upload Metadata", accept=".json", multiple=False, placeholder="Drop your metadata.json here"),
+                                input(name="domain", label="Enter the Domain of your dataset")
+                            ])
+    
+    pipeline.loadData(inputData=inputData["files"], metadata=inputData["metadata"]["content"], domainContext=inputData["domain"])
+    
+    while True:
+        question = input(label="Enter your question")
+        if question == "exit":
+            break
+        else:
+            with put_loading().style("position: absolute; left: 50%"):
+                flag = 0
+                for i in range(5):
+                    try:
+                        filename, code = pipeline.generateGraph(query=question)
+                    except: 
+                        continue
+                    message = pipeline.pythonRepl.run(code)
+                    if message == "":
+                        flag = 1
+                        break
+                    else:
+                        pass
+
+            if flag == 0:
+                put_table([
+                    ["Query: ", question],
+                    ["Response: ", put_text(f"Encountered error in 5 tries, says: {message}")]
+                ])
+            else:
+                put_table([
+                    ["Query: ", question],
+                    ["Response: ", put_html(open(filename, "r").read())]
+                ])
+
+if __name__ == "__main__":
+    config = getConfig("config.ini")
+    start_server(main, port=config.getint("APPLICATION", "port"), host=config.get("APPLICATION", "host"))
@@ -0,0 +1,7 @@
+[LLM]
+model = llama-3.1-70b-versatile
+temperature = 1
+
+[APPLICATION]
+host = 0.0.0.0
+port = 7860
@@ -0,0 +1,62 @@
+prompt: |
+  <system>
+  # Expert Data Visualization System
+
+  You are an expert data/visualization analyst. Generate precise Plotly visualizations from provided metadata and queries.
+
+  ## Critical Data Handling Rules:
+  All DataFrames are already defined and loaded so that they are ready to be used. 
+
+  MANDATORY FIRST STEP:
+  - ALWAYS create copies of original DataFrames before any operations
+  - Perform ALL operations on copied DataFrames only
+  - NEVER modify original DataFrames
+
+  STRICTLY FORBIDDEN:
+  - DO NOT redefine or override existing DataFrame names
+  - DO NOT read/load data
+  - DO NOT create sample data
+  - DO NOT modify original DataFrames directly
+  - DO NOT perform operations on original DataFrames
+
+  REQUIRED:
+  - Create copies first, for example: new_df = df.copy()
+  - Handle data type conversions on copied DataFrames
+  - Use appropriate data type casting when needed
+  - All transformations must be on copied DataFrames
+  - Create new variables for additional transformations
+
+  Sample rows in metadata are for reference only to understand data structure.
+
+  ## Inputs:
+  - Metadata: {metadata}  # Database schema/structure
+  - Query: {user_query}   # User's visualization request
+  - Context: {domain_context}  # Business context
+
+  ## Core Requirements:
+  1. Analysis:
+    - Extract key metrics/relationships
+    - Select optimal visualization type
+    - Consider data characteristics
+    - Validate data types for operations
+
+  2. Visualization Code:
+    - Single, complete Plotly chart
+    - Clean, production code
+    - End with fig.show()
+    - No room for errors
+    - ALWAYS create DataFrame copies first
+    - Perform operations only on copied DataFrames
+    - Handle type conversions safely
+
+  3. Quality Standards:
+    - Data accuracy
+    - Interactive features
+    - Performance optimization
+    - Never empty results
+    - Data type compatibility
+    - Original data preservation
+
+  ## Response Format:
+  <code>Complete Python/Plotly implementation using copied DataFrames</code>
+  </system>
@@ -0,0 +1,13 @@
+flask
+numpy
+pandas
+pyyaml
+plotly
+pywebio
+langchain 
+python-dotenv
+langchain-groq
+langchain-core 
+langchain-community 
+langchain-experimental
+-e .
@@ -0,0 +1,33 @@
+from setuptools import setup, find_packages
+
+HYPHEN_E_DOT = "-e ."
+
+def get_requirements(requirements_path: str) -> list[str]:
+    """
+    Reads the requirements file and returns a list of packages.
+
+    Args:
+        requirements_path (str): Path to the requirements file.
+
+    Returns:
+        list[str]: List of packages required for the project.
+    """
+    with open(requirements_path, "r") as file:
+        requirements = file.read().strip().split("\n")
+    if HYPHEN_E_DOT in requirements:
+        requirements.remove(HYPHEN_E_DOT)
+    return requirements
+
+setup(
+    name="AutoDataAnalyzer",
+    author="Rauhan Ahmed Siddiqui",
+    author_email="rauhaan.siddiqui@gmail.com",
+    version="0.1",
+    packages=find_packages(),
+    install_requires=get_requirements(requirements_path="requirements.txt"),
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+    ],
+)