Skip to content

Commit bb5536a

Browse files
committed
feat: add dataset api
1 parent 92b8d61 commit bb5536a

File tree

19 files changed

+2596
-2
lines changed

19 files changed

+2596
-2
lines changed

packages/sample-app/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
"run:pinecone": "npm run build && node dist/src/sample_pinecone.js",
2929
"run:langchain": "npm run build && node dist/src/sample_langchain.js",
3030
"run:sample_structured_output": "npm run build && node dist/src/sample_structured_output.js",
31+
"run:dataset": "npm run build && node dist/src/sample_dataset.js",
32+
"test:dataset": "npm run build && node dist/src/test_dataset_api.js",
3133
"lint": "eslint .",
3234
"lint:fix": "eslint . --fix"
3335
},
Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
import * as traceloop from "@traceloop/node-server-sdk";
2+
import OpenAI from "openai";
3+
4+
const main = async () => {
5+
// Initialize Traceloop SDK
6+
traceloop.initialize({
7+
appName: "sample_dataset",
8+
apiKey: process.env.TRACELOOP_API_KEY,
9+
disableBatch: true,
10+
traceloopSyncEnabled: true,
11+
});
12+
13+
await traceloop.waitForInitialization();
14+
15+
const client = traceloop.getClient();
16+
if (!client) {
17+
console.error("Failed to initialize Traceloop client");
18+
return;
19+
}
20+
21+
console.log("🚀 Dataset API Sample Application");
22+
console.log("==================================\n");
23+
24+
try {
25+
// 1. Create a new dataset for tracking LLM interactions
26+
console.log("📝 Creating a new dataset...");
27+
const dataset = await client.datasets.create({
28+
name: `llm-interactions-${Date.now()}`,
29+
description: "Dataset for tracking OpenAI chat completions and user interactions"
30+
});
31+
32+
console.log(`✅ Dataset created: ${dataset.name} (ID: ${dataset.id})\n`);
33+
34+
// 2. Define the schema by adding columns
35+
console.log("🏗️ Adding columns to define schema...");
36+
37+
await dataset.addColumn({
38+
name: "user_id",
39+
type: "string",
40+
required: true,
41+
description: "Unique identifier for the user"
42+
});
43+
44+
await dataset.addColumn({
45+
name: "prompt",
46+
type: "string",
47+
required: true,
48+
description: "The user's input prompt"
49+
});
50+
51+
await dataset.addColumn({
52+
name: "response",
53+
type: "string",
54+
required: true,
55+
description: "The AI model's response"
56+
});
57+
58+
await dataset.addColumn({
59+
name: "model",
60+
type: "string",
61+
required: true,
62+
description: "The AI model used (e.g., gpt-4)"
63+
});
64+
65+
await dataset.addColumn({
66+
name: "tokens_used",
67+
type: "number",
68+
required: false,
69+
description: "Total tokens consumed"
70+
});
71+
72+
await dataset.addColumn({
73+
name: "response_time_ms",
74+
type: "number",
75+
required: false,
76+
description: "Response time in milliseconds"
77+
});
78+
79+
await dataset.addColumn({
80+
name: "satisfaction_score",
81+
type: "number",
82+
required: false,
83+
description: "User satisfaction rating (1-5)"
84+
});
85+
86+
await dataset.addColumn({
87+
name: "timestamp",
88+
type: "string",
89+
required: true,
90+
description: "When the interaction occurred"
91+
});
92+
93+
console.log("✅ Schema defined with 8 columns\n");
94+
95+
// 3. Simulate some LLM interactions and collect data
96+
console.log("🤖 Simulating LLM interactions...");
97+
98+
const openai = new OpenAI({
99+
apiKey: process.env.OPENAI_API_KEY
100+
});
101+
102+
const samplePrompts = [
103+
"Explain machine learning in simple terms",
104+
"Write a Python function to calculate fibonacci numbers",
105+
"What are the benefits of using TypeScript?",
106+
"How does async/await work in JavaScript?",
107+
"Explain the concept of closures in programming"
108+
];
109+
110+
const interactions = [];
111+
112+
for (let i = 0; i < samplePrompts.length; i++) {
113+
const prompt = samplePrompts[i];
114+
const userId = `user_${String(i + 1).padStart(3, '0')}`;
115+
116+
console.log(` Processing prompt ${i + 1}/${samplePrompts.length}...`);
117+
118+
const startTime = Date.now();
119+
120+
try {
121+
// Make actual OpenAI API call
122+
const completion = await openai.chat.completions.create({
123+
model: "gpt-3.5-turbo",
124+
messages: [{ role: "user", content: prompt }],
125+
max_tokens: 150
126+
});
127+
128+
const endTime = Date.now();
129+
const response = completion.choices[0]?.message?.content || "No response";
130+
const tokensUsed = completion.usage?.total_tokens || 0;
131+
const responseTime = endTime - startTime;
132+
133+
const interaction = {
134+
user_id: userId,
135+
prompt: prompt,
136+
response: response,
137+
model: "gpt-3.5-turbo",
138+
tokens_used: tokensUsed,
139+
response_time_ms: responseTime,
140+
satisfaction_score: Math.floor(Math.random() * 5) + 1, // Random satisfaction 1-5
141+
timestamp: new Date().toISOString()
142+
};
143+
144+
interactions.push(interaction);
145+
146+
// Add individual row to dataset
147+
await dataset.addRow(interaction);
148+
149+
} catch (error) {
150+
console.log(` ⚠️ Error with prompt ${i + 1}: ${error.message}`);
151+
152+
// Add error interaction data
153+
const errorInteraction = {
154+
user_id: userId,
155+
prompt: prompt,
156+
response: `Error: ${error.message}`,
157+
model: "gpt-3.5-turbo",
158+
tokens_used: 0,
159+
response_time_ms: Date.now() - startTime,
160+
satisfaction_score: 1,
161+
timestamp: new Date().toISOString()
162+
};
163+
164+
interactions.push(errorInteraction);
165+
await dataset.addRow(errorInteraction);
166+
}
167+
}
168+
169+
console.log(`✅ Added ${interactions.length} interaction records\n`);
170+
171+
// 4. Import additional data from CSV
172+
console.log("📊 Importing additional data from CSV...");
173+
174+
const csvData = `user_id,prompt,response,model,tokens_used,response_time_ms,satisfaction_score,timestamp
175+
user_006,"What is React?","React is a JavaScript library for building user interfaces...","gpt-3.5-turbo",85,1200,4,"2024-01-15T10:30:00Z"
176+
user_007,"Explain Docker","Docker is a containerization platform that allows you to package applications...","gpt-3.5-turbo",120,1500,5,"2024-01-15T10:35:00Z"
177+
user_008,"What is GraphQL?","GraphQL is a query language and runtime for APIs...","gpt-3.5-turbo",95,1100,4,"2024-01-15T10:40:00Z"`;
178+
179+
await dataset.fromCSV(csvData, { hasHeader: true });
180+
console.log("✅ Imported 3 additional records from CSV\n");
181+
182+
// 5. Get dataset statistics
183+
console.log("📈 Getting dataset statistics...");
184+
const stats = await dataset.getStats();
185+
console.log(` • Total rows: ${stats.rowCount}`);
186+
console.log(` • Total columns: ${stats.columnCount}`);
187+
console.log(` • Dataset size: ${stats.size} bytes`);
188+
console.log(` • Last modified: ${stats.lastModified}\n`);
189+
190+
// 6. Retrieve and analyze some data
191+
console.log("🔍 Analyzing collected data...");
192+
const rows = await dataset.getRows(10); // Get first 10 rows
193+
194+
if (rows.length > 0) {
195+
console.log(` • Retrieved ${rows.length} rows`);
196+
197+
// Calculate average satisfaction score
198+
const satisfactionScores = rows
199+
.map(row => row.data.satisfaction_score as number)
200+
.filter(score => score != null);
201+
202+
if (satisfactionScores.length > 0) {
203+
const avgSatisfaction = satisfactionScores.reduce((a, b) => a + b, 0) / satisfactionScores.length;
204+
console.log(` • Average satisfaction score: ${avgSatisfaction.toFixed(2)}/5`);
205+
}
206+
207+
// Calculate average response time
208+
const responseTimes = rows
209+
.map(row => row.data.response_time_ms as number)
210+
.filter(time => time != null);
211+
212+
if (responseTimes.length > 0) {
213+
const avgResponseTime = responseTimes.reduce((a, b) => a + b, 0) / responseTimes.length;
214+
console.log(` • Average response time: ${avgResponseTime.toFixed(0)}ms`);
215+
}
216+
217+
// Show sample interactions
218+
console.log("\n📋 Sample interactions:");
219+
rows.slice(0, 3).forEach((row, index) => {
220+
console.log(` ${index + 1}. User: "${row.data.prompt}"`);
221+
console.log(` Response: "${String(row.data.response).substring(0, 80)}..."`);
222+
console.log(` Satisfaction: ${row.data.satisfaction_score}/5\n`);
223+
});
224+
}
225+
226+
// 7. Get dataset versions (if any exist)
227+
console.log("📚 Checking dataset versions...");
228+
try {
229+
const versions = await dataset.getVersions();
230+
console.log(` • Total versions: ${versions.total}`);
231+
232+
if (versions.versions.length > 0) {
233+
console.log(" • Available versions:");
234+
versions.versions.forEach(version => {
235+
console.log(` - ${version.version} (published: ${version.publishedAt})`);
236+
});
237+
} else {
238+
console.log(" • No published versions yet");
239+
}
240+
} catch (error) {
241+
console.log(` ⚠️ Could not retrieve versions: ${error.message}`);
242+
}
243+
244+
console.log();
245+
246+
// 8. Publish the dataset
247+
console.log("🚀 Publishing dataset...");
248+
await dataset.publish({
249+
version: "v1.0",
250+
description: "Initial release of LLM interactions dataset with sample data"
251+
});
252+
253+
console.log(`✅ Dataset published! Status: ${dataset.published ? 'Published' : 'Draft'}\n`);
254+
255+
// 9. List all datasets (to show our new one)
256+
console.log("📑 Listing all datasets...");
257+
const datasetsList = await client.datasets.list(1, 5); // First 5 datasets
258+
console.log(` • Found ${datasetsList.total} total datasets`);
259+
console.log(" • Recent datasets:");
260+
261+
datasetsList.datasets.slice(0, 3).forEach((ds, index) => {
262+
const isOurDataset = ds.id === dataset.id;
263+
console.log(` ${index + 1}. ${ds.name}${isOurDataset ? ' ← (just created!)' : ''}`);
264+
console.log(` Description: ${ds.description || 'No description'}`);
265+
console.log(` Published: ${ds.published ? 'Yes' : 'No'}\n`);
266+
});
267+
268+
// 10. Demonstrate search functionality
269+
console.log("🔎 Testing search functionality...");
270+
const foundDataset = await client.datasets.findByName(dataset.name);
271+
if (foundDataset) {
272+
console.log(`✅ Found dataset by name: ${foundDataset.name} (ID: ${foundDataset.id})`);
273+
} else {
274+
console.log("❌ Could not find dataset by name");
275+
}
276+
277+
console.log("\n🎉 Dataset API demonstration completed successfully!");
278+
console.log("\n💡 Key features demonstrated:");
279+
console.log(" • Dataset creation and schema definition");
280+
console.log(" • Real-time data collection from LLM interactions");
281+
console.log(" • CSV data import capabilities");
282+
console.log(" • Statistical analysis of collected data");
283+
console.log(" • Dataset publishing and version management");
284+
console.log(" • Search and retrieval operations");
285+
286+
console.log(`\n📊 Dataset Summary:`);
287+
console.log(` • Name: ${dataset.name}`);
288+
console.log(` • ID: ${dataset.id}`);
289+
console.log(` • Published: ${dataset.published ? 'Yes' : 'No'}`);
290+
console.log(` • Total interactions recorded: ${stats.rowCount}`);
291+
292+
} catch (error) {
293+
console.error("❌ Error in dataset operations:", error.message);
294+
if (error.stack) {
295+
console.error("Stack trace:", error.stack);
296+
}
297+
}
298+
};
299+
300+
// Error handling for the main function
301+
main().catch((error) => {
302+
console.error("💥 Application failed:", error.message);
303+
process.exit(1);
304+
});

0 commit comments

Comments
 (0)