1
+ import * as traceloop from "@traceloop/node-server-sdk" ;
2
+ import OpenAI from "openai" ;
3
+
4
+ const main = async ( ) => {
5
+ // Initialize Traceloop SDK
6
+ traceloop . initialize ( {
7
+ appName : "sample_dataset" ,
8
+ apiKey : process . env . TRACELOOP_API_KEY ,
9
+ disableBatch : true ,
10
+ traceloopSyncEnabled : true ,
11
+ } ) ;
12
+
13
+ await traceloop . waitForInitialization ( ) ;
14
+
15
+ const client = traceloop . getClient ( ) ;
16
+ if ( ! client ) {
17
+ console . error ( "Failed to initialize Traceloop client" ) ;
18
+ return ;
19
+ }
20
+
21
+ console . log ( "🚀 Dataset API Sample Application" ) ;
22
+ console . log ( "==================================\n" ) ;
23
+
24
+ try {
25
+ // 1. Create a new dataset for tracking LLM interactions
26
+ console . log ( "📝 Creating a new dataset..." ) ;
27
+ const dataset = await client . datasets . create ( {
28
+ name : `llm-interactions-${ Date . now ( ) } ` ,
29
+ description : "Dataset for tracking OpenAI chat completions and user interactions"
30
+ } ) ;
31
+
32
+ console . log ( `✅ Dataset created: ${ dataset . name } (ID: ${ dataset . id } )\n` ) ;
33
+
34
+ // 2. Define the schema by adding columns
35
+ console . log ( "🏗️ Adding columns to define schema..." ) ;
36
+
37
+ await dataset . addColumn ( {
38
+ name : "user_id" ,
39
+ type : "string" ,
40
+ required : true ,
41
+ description : "Unique identifier for the user"
42
+ } ) ;
43
+
44
+ await dataset . addColumn ( {
45
+ name : "prompt" ,
46
+ type : "string" ,
47
+ required : true ,
48
+ description : "The user's input prompt"
49
+ } ) ;
50
+
51
+ await dataset . addColumn ( {
52
+ name : "response" ,
53
+ type : "string" ,
54
+ required : true ,
55
+ description : "The AI model's response"
56
+ } ) ;
57
+
58
+ await dataset . addColumn ( {
59
+ name : "model" ,
60
+ type : "string" ,
61
+ required : true ,
62
+ description : "The AI model used (e.g., gpt-4)"
63
+ } ) ;
64
+
65
+ await dataset . addColumn ( {
66
+ name : "tokens_used" ,
67
+ type : "number" ,
68
+ required : false ,
69
+ description : "Total tokens consumed"
70
+ } ) ;
71
+
72
+ await dataset . addColumn ( {
73
+ name : "response_time_ms" ,
74
+ type : "number" ,
75
+ required : false ,
76
+ description : "Response time in milliseconds"
77
+ } ) ;
78
+
79
+ await dataset . addColumn ( {
80
+ name : "satisfaction_score" ,
81
+ type : "number" ,
82
+ required : false ,
83
+ description : "User satisfaction rating (1-5)"
84
+ } ) ;
85
+
86
+ await dataset . addColumn ( {
87
+ name : "timestamp" ,
88
+ type : "string" ,
89
+ required : true ,
90
+ description : "When the interaction occurred"
91
+ } ) ;
92
+
93
+ console . log ( "✅ Schema defined with 8 columns\n" ) ;
94
+
95
+ // 3. Simulate some LLM interactions and collect data
96
+ console . log ( "🤖 Simulating LLM interactions..." ) ;
97
+
98
+ const openai = new OpenAI ( {
99
+ apiKey : process . env . OPENAI_API_KEY
100
+ } ) ;
101
+
102
+ const samplePrompts = [
103
+ "Explain machine learning in simple terms" ,
104
+ "Write a Python function to calculate fibonacci numbers" ,
105
+ "What are the benefits of using TypeScript?" ,
106
+ "How does async/await work in JavaScript?" ,
107
+ "Explain the concept of closures in programming"
108
+ ] ;
109
+
110
+ const interactions = [ ] ;
111
+
112
+ for ( let i = 0 ; i < samplePrompts . length ; i ++ ) {
113
+ const prompt = samplePrompts [ i ] ;
114
+ const userId = `user_${ String ( i + 1 ) . padStart ( 3 , '0' ) } ` ;
115
+
116
+ console . log ( ` Processing prompt ${ i + 1 } /${ samplePrompts . length } ...` ) ;
117
+
118
+ const startTime = Date . now ( ) ;
119
+
120
+ try {
121
+ // Make actual OpenAI API call
122
+ const completion = await openai . chat . completions . create ( {
123
+ model : "gpt-3.5-turbo" ,
124
+ messages : [ { role : "user" , content : prompt } ] ,
125
+ max_tokens : 150
126
+ } ) ;
127
+
128
+ const endTime = Date . now ( ) ;
129
+ const response = completion . choices [ 0 ] ?. message ?. content || "No response" ;
130
+ const tokensUsed = completion . usage ?. total_tokens || 0 ;
131
+ const responseTime = endTime - startTime ;
132
+
133
+ const interaction = {
134
+ user_id : userId ,
135
+ prompt : prompt ,
136
+ response : response ,
137
+ model : "gpt-3.5-turbo" ,
138
+ tokens_used : tokensUsed ,
139
+ response_time_ms : responseTime ,
140
+ satisfaction_score : Math . floor ( Math . random ( ) * 5 ) + 1 , // Random satisfaction 1-5
141
+ timestamp : new Date ( ) . toISOString ( )
142
+ } ;
143
+
144
+ interactions . push ( interaction ) ;
145
+
146
+ // Add individual row to dataset
147
+ await dataset . addRow ( interaction ) ;
148
+
149
+ } catch ( error ) {
150
+ console . log ( ` ⚠️ Error with prompt ${ i + 1 } : ${ error . message } ` ) ;
151
+
152
+ // Add error interaction data
153
+ const errorInteraction = {
154
+ user_id : userId ,
155
+ prompt : prompt ,
156
+ response : `Error: ${ error . message } ` ,
157
+ model : "gpt-3.5-turbo" ,
158
+ tokens_used : 0 ,
159
+ response_time_ms : Date . now ( ) - startTime ,
160
+ satisfaction_score : 1 ,
161
+ timestamp : new Date ( ) . toISOString ( )
162
+ } ;
163
+
164
+ interactions . push ( errorInteraction ) ;
165
+ await dataset . addRow ( errorInteraction ) ;
166
+ }
167
+ }
168
+
169
+ console . log ( `✅ Added ${ interactions . length } interaction records\n` ) ;
170
+
171
+ // 4. Import additional data from CSV
172
+ console . log ( "📊 Importing additional data from CSV..." ) ;
173
+
174
+ const csvData = `user_id,prompt,response,model,tokens_used,response_time_ms,satisfaction_score,timestamp
175
+ user_006,"What is React?","React is a JavaScript library for building user interfaces...","gpt-3.5-turbo",85,1200,4,"2024-01-15T10:30:00Z"
176
+ user_007,"Explain Docker","Docker is a containerization platform that allows you to package applications...","gpt-3.5-turbo",120,1500,5,"2024-01-15T10:35:00Z"
177
+ user_008,"What is GraphQL?","GraphQL is a query language and runtime for APIs...","gpt-3.5-turbo",95,1100,4,"2024-01-15T10:40:00Z"` ;
178
+
179
+ await dataset . fromCSV ( csvData , { hasHeader : true } ) ;
180
+ console . log ( "✅ Imported 3 additional records from CSV\n" ) ;
181
+
182
+ // 5. Get dataset statistics
183
+ console . log ( "📈 Getting dataset statistics..." ) ;
184
+ const stats = await dataset . getStats ( ) ;
185
+ console . log ( ` • Total rows: ${ stats . rowCount } ` ) ;
186
+ console . log ( ` • Total columns: ${ stats . columnCount } ` ) ;
187
+ console . log ( ` • Dataset size: ${ stats . size } bytes` ) ;
188
+ console . log ( ` • Last modified: ${ stats . lastModified } \n` ) ;
189
+
190
+ // 6. Retrieve and analyze some data
191
+ console . log ( "🔍 Analyzing collected data..." ) ;
192
+ const rows = await dataset . getRows ( 10 ) ; // Get first 10 rows
193
+
194
+ if ( rows . length > 0 ) {
195
+ console . log ( ` • Retrieved ${ rows . length } rows` ) ;
196
+
197
+ // Calculate average satisfaction score
198
+ const satisfactionScores = rows
199
+ . map ( row => row . data . satisfaction_score as number )
200
+ . filter ( score => score != null ) ;
201
+
202
+ if ( satisfactionScores . length > 0 ) {
203
+ const avgSatisfaction = satisfactionScores . reduce ( ( a , b ) => a + b , 0 ) / satisfactionScores . length ;
204
+ console . log ( ` • Average satisfaction score: ${ avgSatisfaction . toFixed ( 2 ) } /5` ) ;
205
+ }
206
+
207
+ // Calculate average response time
208
+ const responseTimes = rows
209
+ . map ( row => row . data . response_time_ms as number )
210
+ . filter ( time => time != null ) ;
211
+
212
+ if ( responseTimes . length > 0 ) {
213
+ const avgResponseTime = responseTimes . reduce ( ( a , b ) => a + b , 0 ) / responseTimes . length ;
214
+ console . log ( ` • Average response time: ${ avgResponseTime . toFixed ( 0 ) } ms` ) ;
215
+ }
216
+
217
+ // Show sample interactions
218
+ console . log ( "\n📋 Sample interactions:" ) ;
219
+ rows . slice ( 0 , 3 ) . forEach ( ( row , index ) => {
220
+ console . log ( ` ${ index + 1 } . User: "${ row . data . prompt } "` ) ;
221
+ console . log ( ` Response: "${ String ( row . data . response ) . substring ( 0 , 80 ) } ..."` ) ;
222
+ console . log ( ` Satisfaction: ${ row . data . satisfaction_score } /5\n` ) ;
223
+ } ) ;
224
+ }
225
+
226
+ // 7. Get dataset versions (if any exist)
227
+ console . log ( "📚 Checking dataset versions..." ) ;
228
+ try {
229
+ const versions = await dataset . getVersions ( ) ;
230
+ console . log ( ` • Total versions: ${ versions . total } ` ) ;
231
+
232
+ if ( versions . versions . length > 0 ) {
233
+ console . log ( " • Available versions:" ) ;
234
+ versions . versions . forEach ( version => {
235
+ console . log ( ` - ${ version . version } (published: ${ version . publishedAt } )` ) ;
236
+ } ) ;
237
+ } else {
238
+ console . log ( " • No published versions yet" ) ;
239
+ }
240
+ } catch ( error ) {
241
+ console . log ( ` ⚠️ Could not retrieve versions: ${ error . message } ` ) ;
242
+ }
243
+
244
+ console . log ( ) ;
245
+
246
+ // 8. Publish the dataset
247
+ console . log ( "🚀 Publishing dataset..." ) ;
248
+ await dataset . publish ( {
249
+ version : "v1.0" ,
250
+ description : "Initial release of LLM interactions dataset with sample data"
251
+ } ) ;
252
+
253
+ console . log ( `✅ Dataset published! Status: ${ dataset . published ? 'Published' : 'Draft' } \n` ) ;
254
+
255
+ // 9. List all datasets (to show our new one)
256
+ console . log ( "📑 Listing all datasets..." ) ;
257
+ const datasetsList = await client . datasets . list ( 1 , 5 ) ; // First 5 datasets
258
+ console . log ( ` • Found ${ datasetsList . total } total datasets` ) ;
259
+ console . log ( " • Recent datasets:" ) ;
260
+
261
+ datasetsList . datasets . slice ( 0 , 3 ) . forEach ( ( ds , index ) => {
262
+ const isOurDataset = ds . id === dataset . id ;
263
+ console . log ( ` ${ index + 1 } . ${ ds . name } ${ isOurDataset ? ' ← (just created!)' : '' } ` ) ;
264
+ console . log ( ` Description: ${ ds . description || 'No description' } ` ) ;
265
+ console . log ( ` Published: ${ ds . published ? 'Yes' : 'No' } \n` ) ;
266
+ } ) ;
267
+
268
+ // 10. Demonstrate search functionality
269
+ console . log ( "🔎 Testing search functionality..." ) ;
270
+ const foundDataset = await client . datasets . findByName ( dataset . name ) ;
271
+ if ( foundDataset ) {
272
+ console . log ( `✅ Found dataset by name: ${ foundDataset . name } (ID: ${ foundDataset . id } )` ) ;
273
+ } else {
274
+ console . log ( "❌ Could not find dataset by name" ) ;
275
+ }
276
+
277
+ console . log ( "\n🎉 Dataset API demonstration completed successfully!" ) ;
278
+ console . log ( "\n💡 Key features demonstrated:" ) ;
279
+ console . log ( " • Dataset creation and schema definition" ) ;
280
+ console . log ( " • Real-time data collection from LLM interactions" ) ;
281
+ console . log ( " • CSV data import capabilities" ) ;
282
+ console . log ( " • Statistical analysis of collected data" ) ;
283
+ console . log ( " • Dataset publishing and version management" ) ;
284
+ console . log ( " • Search and retrieval operations" ) ;
285
+
286
+ console . log ( `\n📊 Dataset Summary:` ) ;
287
+ console . log ( ` • Name: ${ dataset . name } ` ) ;
288
+ console . log ( ` • ID: ${ dataset . id } ` ) ;
289
+ console . log ( ` • Published: ${ dataset . published ? 'Yes' : 'No' } ` ) ;
290
+ console . log ( ` • Total interactions recorded: ${ stats . rowCount } ` ) ;
291
+
292
+ } catch ( error ) {
293
+ console . error ( "❌ Error in dataset operations:" , error . message ) ;
294
+ if ( error . stack ) {
295
+ console . error ( "Stack trace:" , error . stack ) ;
296
+ }
297
+ }
298
+ } ;
299
+
300
+ // Error handling for the main function
301
+ main ( ) . catch ( ( error ) => {
302
+ console . error ( "💥 Application failed:" , error . message ) ;
303
+ process . exit ( 1 ) ;
304
+ } ) ;
0 commit comments