Skip to content

Commit be63176

Browse files
committed
UPLOAD: initial commit
1 parent 396bbea commit be63176

20 files changed

+576
-0
lines changed

.gitattributes

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
*.7z filter=lfs diff=lfs merge=lfs -text
2+
*.arrow filter=lfs diff=lfs merge=lfs -text
3+
*.bin filter=lfs diff=lfs merge=lfs -text
4+
*.bz2 filter=lfs diff=lfs merge=lfs -text
5+
*.ckpt filter=lfs diff=lfs merge=lfs -text
6+
*.ftz filter=lfs diff=lfs merge=lfs -text
7+
*.gz filter=lfs diff=lfs merge=lfs -text
8+
*.h5 filter=lfs diff=lfs merge=lfs -text
9+
*.joblib filter=lfs diff=lfs merge=lfs -text
10+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
11+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
12+
*.model filter=lfs diff=lfs merge=lfs -text
13+
*.msgpack filter=lfs diff=lfs merge=lfs -text
14+
*.npy filter=lfs diff=lfs merge=lfs -text
15+
*.npz filter=lfs diff=lfs merge=lfs -text
16+
*.onnx filter=lfs diff=lfs merge=lfs -text
17+
*.ot filter=lfs diff=lfs merge=lfs -text
18+
*.parquet filter=lfs diff=lfs merge=lfs -text
19+
*.pb filter=lfs diff=lfs merge=lfs -text
20+
*.pickle filter=lfs diff=lfs merge=lfs -text
21+
*.pkl filter=lfs diff=lfs merge=lfs -text
22+
*.pt filter=lfs diff=lfs merge=lfs -text
23+
*.pth filter=lfs diff=lfs merge=lfs -text
24+
*.rar filter=lfs diff=lfs merge=lfs -text
25+
*.safetensors filter=lfs diff=lfs merge=lfs -text
26+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27+
*.tar.* filter=lfs diff=lfs merge=lfs -text
28+
*.tar filter=lfs diff=lfs merge=lfs -text
29+
*.tflite filter=lfs diff=lfs merge=lfs -text
30+
*.tgz filter=lfs diff=lfs merge=lfs -text
31+
*.wasm filter=lfs diff=lfs merge=lfs -text
32+
*.xz filter=lfs diff=lfs merge=lfs -text
33+
*.zip filter=lfs diff=lfs merge=lfs -text
34+
*.zst filter=lfs diff=lfs merge=lfs -text
35+
*tfevents* filter=lfs diff=lfs merge=lfs -text

.github/workflows/ci-cd.yaml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
name: CI/CD
2+
3+
on:
4+
push:
5+
6+
jobs:
7+
docker:
8+
runs-on: ubuntu-latest
9+
steps:
10+
-
11+
name: Set up QEMU
12+
uses: docker/setup-qemu-action@v3
13+
-
14+
name: Set up Docker Buildx
15+
uses: docker/setup-buildx-action@v3
16+
-
17+
name: Login to Docker Hub
18+
uses: docker/login-action@v3
19+
with:
20+
username: ${{ secrets.DOCKER_USERNAME }}
21+
password: ${{ secrets.DOCKER_TOKEN }}
22+
-
23+
name: Build and push
24+
uses: docker/build-push-action@v6
25+
with:
26+
push: true
27+
tags: siddiquirauhan/autodataanalyzer:latest

Dockerfile

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
FROM python:3.10-slim
2+
3+
COPY . /app
4+
5+
WORKDIR /app
6+
7+
USER root
8+
9+
RUN pip install -r requirements.txt
10+
11+
RUN chmod -R 777 /app
12+
13+
EXPOSE 7860
14+
15+
CMD ["python", "app.py"]

README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
---
2+
title: DataAnalyser
3+
emoji: 🏃
4+
colorFrom: gray
5+
colorTo: purple
6+
sdk: docker
7+
pinned: false
8+
license: mit
9+
---
10+
11+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
from src.pipelines.pipeline import CompletePipeline
2+
from pywebio.platform.flask import start_server
3+
from src.utils.functions import getConfig
4+
from pywebio.output import *
5+
from pywebio.input import *
6+
7+
def main():
8+
"""
9+
Main function to run the application, handle user inputs, and interact with the pipeline.
10+
11+
This function facilitates file uploads, processes user queries, and displays responses
12+
using the CompletePipeline. The application continues to run until the user inputs "exit".
13+
"""
14+
pipeline = CompletePipeline()
15+
16+
inputData = input_group("Data Upload",
17+
inputs=[
18+
file_upload(name="files", label="Upload Files", accept=".csv", multiple=True, placeholder="Drop your CSV files here"),
19+
file_upload(name="metadata", label="Upload Metadata", accept=".json", multiple=False, placeholder="Drop your metadata.json here"),
20+
input(name="domain", label="Enter the Domain of your dataset")
21+
])
22+
23+
pipeline.loadData(inputData=inputData["files"], metadata=inputData["metadata"]["content"], domainContext=inputData["domain"])
24+
25+
while True:
26+
question = input(label="Enter your question")
27+
if question == "exit":
28+
break
29+
else:
30+
with put_loading().style("position: absolute; left: 50%"):
31+
flag = 0
32+
for i in range(5):
33+
try:
34+
filename, code = pipeline.generateGraph(query=question)
35+
except:
36+
continue
37+
message = pipeline.pythonRepl.run(code)
38+
if message == "":
39+
flag = 1
40+
break
41+
else:
42+
pass
43+
44+
if flag == 0:
45+
put_table([
46+
["Query: ", question],
47+
["Response: ", put_text(f"Encountered error in 5 tries, says: {message}")]
48+
])
49+
else:
50+
put_table([
51+
["Query: ", question],
52+
["Response: ", put_html(open(filename, "r").read())]
53+
])
54+
55+
if __name__ == "__main__":
56+
config = getConfig("config.ini")
57+
start_server(main, port=config.getint("APPLICATION", "port"), host=config.get("APPLICATION", "host"))

config.ini

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[LLM]
2+
model = llama-3.1-70b-versatile
3+
temperature = 1
4+
5+
[APPLICATION]
6+
host = 0.0.0.0
7+
port = 7860

params.yaml

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
prompt: |
2+
<system>
3+
# Expert Data Visualization System
4+
5+
You are an expert data/visualization analyst. Generate precise Plotly visualizations from provided metadata and queries.
6+
7+
## Critical Data Handling Rules:
8+
All DataFrames are already defined and loaded so that they are ready to be used.
9+
10+
MANDATORY FIRST STEP:
11+
- ALWAYS create copies of original DataFrames before any operations
12+
- Perform ALL operations on copied DataFrames only
13+
- NEVER modify original DataFrames
14+
15+
STRICTLY FORBIDDEN:
16+
- DO NOT redefine or override existing DataFrame names
17+
- DO NOT read/load data
18+
- DO NOT create sample data
19+
- DO NOT modify original DataFrames directly
20+
- DO NOT perform operations on original DataFrames
21+
22+
REQUIRED:
23+
- Create copies first, for example: new_df = df.copy()
24+
- Handle data type conversions on copied DataFrames
25+
- Use appropriate data type casting when needed
26+
- All transformations must be on copied DataFrames
27+
- Create new variables for additional transformations
28+
29+
Sample rows in metadata are for reference only to understand data structure.
30+
31+
## Inputs:
32+
- Metadata: {metadata} # Database schema/structure
33+
- Query: {user_query} # User's visualization request
34+
- Context: {domain_context} # Business context
35+
36+
## Core Requirements:
37+
1. Analysis:
38+
- Extract key metrics/relationships
39+
- Select optimal visualization type
40+
- Consider data characteristics
41+
- Validate data types for operations
42+
43+
2. Visualization Code:
44+
- Single, complete Plotly chart
45+
- Clean, production code
46+
- End with fig.show()
47+
- No room for errors
48+
- ALWAYS create DataFrame copies first
49+
- Perform operations only on copied DataFrames
50+
- Handle type conversions safely
51+
52+
3. Quality Standards:
53+
- Data accuracy
54+
- Interactive features
55+
- Performance optimization
56+
- Never empty results
57+
- Data type compatibility
58+
- Original data preservation
59+
60+
## Response Format:
61+
<code>Complete Python/Plotly implementation using copied DataFrames</code>
62+
</system>

requirements.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
flask
2+
numpy
3+
pandas
4+
pyyaml
5+
plotly
6+
pywebio
7+
langchain
8+
python-dotenv
9+
langchain-groq
10+
langchain-core
11+
langchain-community
12+
langchain-experimental
13+
-e .

setup.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from setuptools import setup, find_packages
2+
3+
HYPHEN_E_DOT = "-e ."
4+
5+
def get_requirements(requirements_path: str) -> list[str]:
6+
"""
7+
Reads the requirements file and returns a list of packages.
8+
9+
Args:
10+
requirements_path (str): Path to the requirements file.
11+
12+
Returns:
13+
list[str]: List of packages required for the project.
14+
"""
15+
with open(requirements_path, "r") as file:
16+
requirements = file.read().strip().split("\n")
17+
if HYPHEN_E_DOT in requirements:
18+
requirements.remove(HYPHEN_E_DOT)
19+
return requirements
20+
21+
setup(
22+
name="AutoDataAnalyzer",
23+
author="Rauhan Ahmed Siddiqui",
24+
author_email="rauhaan.siddiqui@gmail.com",
25+
version="0.1",
26+
packages=find_packages(),
27+
install_requires=get_requirements(requirements_path="requirements.txt"),
28+
classifiers=[
29+
"Programming Language :: Python :: 3",
30+
"License :: OSI Approved :: MIT License",
31+
"Operating System :: OS Independent",
32+
],
33+
)

src/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)