How to build raw interactions table using Data API

This document describes the process of reconstructing the Raw Interactions table using data retrieved from the Data API.

The Data API provides raw data in a normalized format, split across multiple related tables. To recreate the unified Raw Interactions view, these tables must be joined and transformed to align with the structure and semantics of the original dataset.

In this exercise, we download the raw data into an Excel file and perform the necessary transformations within Excel to build the final view.

Step - 1 : Fetching and storing data into CSV files.

The Data API distributes raw data across five key tables: Conversations, Interactions, Plugin Calls, Plugin Resources, and Users.

We use a Python script to fetch data from the past 24 hours and generate an Excel file containing all five tables.

You can copy the script into your environment and execute it. Be sure to replace the placeholder for the API key with your actual Data API credentials before running the script.

Python script

1 import requests
2 import pandas as pd
3 import time
4 import logging
5 from pathlib import Path
6 import re
7 
8 # Config
9 BASE_API = 'https://api.moveworks.ai/export/v1beta2/records'
10 ENDPOINTS = [
11     'interactions',
12     'conversations',
13     'plugin-calls',
14     'plugin-resources',
15     'users'
16 ]
17 ACCESS_TOKEN = '<Access Token>'  # Replace this
18 
19 # Filter time window (applied to all endpoints)
20 TIME_FILTER = "last_updated_time ge '2025-07-15T00:00:00.000Z' and last_updated_time le '2025-07-16T00:00:00.000Z'"
21 
22 # Setup logging
23 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
24 
25 # Headers
26 headers = {
27     'Authorization': f'Bearer {ACCESS_TOKEN}',
28 }
29 
30 # Retry config
31 MAX_RETRIES = 5
32 
33 # Function to clean illegal characters
34 def clean_illegal_chars(df):
35     def remove_illegal_chars(value):
36         if isinstance(value, str):
37             return re.sub(r'[�--]', '', value)
38         return value
39     return df.applymap(remove_illegal_chars)
40 
41 # Flatten nested fields
42 def flatten_record(record):
43     flat = record.copy()
44     if isinstance(flat.get("detail"), dict):
45         detail = flat.pop("detail", {}) or {}
46         for key, value in detail.items():
47             if isinstance(value, list):
48                 value = ','.join(map(str, value))
49             flat[f'detail_{key}'] = value
50     return flat
51 
52 # Collect data per endpoint
53 all_dataframes = {}
54 
55 def fetch_data(endpoint):
56     url = f'{BASE_API}/{endpoint}'
57     params = {
58         '$orderby': 'id desc',
59         '$filter': TIME_FILTER,
60     }
61 
62     data = []
63     retries = 0
64     page_count = 0
65 
66     logging.info(f"Fetching data from /{endpoint}")
67 
68     while url:
69         try:
70             logging.info(f"Requesting page {page_count + 1}: {url}")
71             response = requests.get(url, headers=headers, params=params if url.endswith(endpoint) else None)
72 
73             if response.status_code == 200:
74                 json_resp = response.json()
75                 page_data = json_resp.get('value', [])
76                 logging.info(f"Retrieved {len(page_data)} records from page {page_count + 1}")
77 
78                 if not page_data:
79                     break
80 
81                 for record in page_data:
82                     data.append(flatten_record(record))
83 
84                 url = json_resp.get('@odata.nextLink')
85                 retries = 0
86                 page_count += 1
87 
88                 if endpoint == 'users':
89                     time.sleep(2)
90 
91             elif response.status_code == 429:
92                 wait = 90 if endpoint == 'users' else 60
93                 logging.warning(f"Rate limited on /{endpoint}. Waiting for {wait} seconds.")
94                 time.sleep(wait)
95 
96             elif response.status_code in (500, 502, 503, 504):
97                 if retries < MAX_RETRIES:
98                     wait = 2 ** retries
99                     logging.warning(f"Transient error {response.status_code} on /{endpoint}. Retrying in {wait} seconds...")
100                     time.sleep(wait)
101                     retries += 1
102                 else:
103                     logging.error(f"Max retries reached on /{endpoint}. Aborting.")
104                     break
105 
106             else:
107                 logging.error(f"Unexpected error {response.status_code} on /{endpoint}: {response.text}")
108                 break
109 
110         except Exception as e:
111             logging.exception(f"Exception occurred while calling /{endpoint}")
112             break
113 
114     if data:
115         df = pd.DataFrame(data)
116         df = clean_illegal_chars(df)
117         all_dataframes[endpoint] = df
118         logging.info(f"Ingestion complete for /{endpoint}, {len(df)} records.")
119     else:
120         logging.info(f"No data retrieved from /{endpoint}.")
121 
122 # Run ingestion for all endpoints
123 for endpoint in ENDPOINTS:
124     fetch_data(endpoint)
125 
126 # Save to Excel with one sheet per endpoint
127 if all_dataframes:
128     output_file = Path("moveworks_export.xlsx")
129     with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
130         for endpoint, df in all_dataframes.items():
131             sheet_name = endpoint[:31]  # Excel sheet name max limit
132             df.to_excel(writer, sheet_name=sheet_name, index=False)
133     logging.info(f"All data written to {output_file}")
134 else:
135     logging.warning("No data to write to Excel.")

Step - 2 : Modification of the excel file to generate the raw interactions view.

Let’s learn the sequence of columns shown in raw interactions table and understand where this data is stored in Data API

Raw interactions table	Definition	Data API table
Timestamp	This the created time for user interaction	Interactions
Conversation ID	Conversation ID attributed to an interaction stored	Interactions
Conversation Domain	This is detected domain on a conversation level	Conversations
Conversation Topic	All topics aggregated for interactions based on the conversation ID	Interactions
Interaction Type	Type of interaction	Interactions
Interaction content	User message or action information	Interactions
Bot Response	Summarized response provided by the AI Assistant	Interactions
Unsuccessful Plugins	Plugins called by the AI Assistant but were not served and not used	Plugin calls
Plugin Served	Plugins that the reasoner considered and served to the user	Plugin calls
Plugin Used	Plugins that the user interacted or engaged with	Plugin calls
Resource Domain	Resource domain configured during setting up the ingestion.	Plugin resource
No of citations	Total no of citations provided in the summarized response	Plugin resource
Content Item name	Name of the content item used to generate the summarized response	Plugin resource
Content Item ID	External resource ID of the content item	Plugin resource
Ticket type	Indicates who filed the ticket user initiated ticket : ticket filed by the end user as an escalation bot initiated ticket : ticket filed by the AI Assistant as a receipt	Plugin resource
Ticket ID	Unique identifier for a ticket	Plugin resource
Interaction surface	Platform where the interaction took place	Interactions
User Department	Department to which the end user belongs to
User Location	Location to which the end user belongs to
User country	Country to which the end user belongs to
User Preferred language	Default language configured in the AI Assistant	Users

Now that we have identified the columns required to reconstruct the Raw Interactions view, let’s begin working through the raw data and progressively merge the interactions and conversations tables to form a unified view. Please note that we will be using Excel functions throughout this process.

Bring in the base columns — Actor, Conversation ID, Created Time, Interaction ID, Interaction Type

a. From the interactions sheet, extract the relevant details into a new sheet. We are only interested in user-led interactions.

Use the following formula to filter this data:

=FILTER(interactions!A:D, interactions!A:A="user")
Using the Conversation ID, fetch Conversation Domain and Conversation Topic

a. To fetch the Conversation Domain, use the VLOOKUP function to pull data from the conversations sheet:

=VLOOKUP(B4,conversations!B:E,3,FALSE)

b. To recreate the Conversation Topic, aggregate all topics detected at the interaction level and replicate them for each interaction.

Use this formula:

=TEXTJOIN(", ", TRUE, UNIQUE(FILTER(interactions!P:P, interactions!B:B= B2)))
Fetch additional content fields from the Interactions table

a. To extract Interaction Content, pull values from detail_content, detail_detail, or similar columns, and combine them based on data availability. Use the following nested IF structure:

=IF(INDEX(interactions!K:K, MATCH(D1294, interactions!D:D, 0))<>"", INDEX(interactions!K:K, MATCH(D1294, interactions!D:D, 0)), IF(INDEX(interactions!N:N, MATCH(D1294, interactions!D:D, 0))<>"", INDEX(interactions!N:N, MATCH(D1294, interactions!D:D, 0)), IF(INDEX(interactions!L:L, MATCH(D1294, interactions!D:D, 0))<>"", INDEX(interactions!L:L, MATCH(D1294, interactions!D:D, 0)), IF(INDEX(interactions!Q:Q, MATCH(D1294, interactions!D:D, 0))<>"", INDEX(interactions!Q:Q, MATCH(D1294, interactions!D:D, 0)), ""))))

b. To retrieve the Bot Response corresponding to each user interaction, use the parent interaction ID to index into bot-led interactions:

=TEXTJOIN(CHAR(10), TRUE, FILTER(interactions!K:K, interactions!G:G=D2))

There can be multiple bot interactions pointing to a single user interaction. Join these interactions to form a bot response similar to raw interactions table.
Determine plugins involved in an interaction using the Plugin Calls table

Retrieve three plugin-related columns using the formulas below:

a. Unsuccessful plugin:

=TEXTJOIN(", ", TRUE,FILTER('plugin-calls'!F:F,('plugin-calls'!D:D=D10) *('plugin-calls'!H:H =FALSE) *('plugin-calls'!I:I=FALSE)))

b. Plugin served:

=TEXTJOIN(", ", TRUE,FILTER('plugin-calls'!F:F,('plugin-calls'!D:D=D10) *('plugin-calls'!H:H =TRUE)))

c. Plugin used:

=TEXTJOIN(", ", TRUE,FILTER('plugin-calls'!F:F,('plugin-calls'!D:D=D10) *('plugin-calls'!H:H =TRUE) *('plugin-calls'!I:I=TRUE)))
Add plugin resource information

Use the plugin-resources table to fetch citation and content-related metadata:

a. Resource Domain: Indicates the configured domain for knowledge or ticket-based interactions.

=TEXTJOIN(", ", TRUE, UNIQUE(FILTER('plugin-resources'!K:K, 'plugin-resources'!E:E = D2)))

b. Number of Citations: Counts distinct resources cited per interaction.

=IFERROR(COUNTA(UNIQUE(FILTER('plugin-resources'!D:D, 'plugin-resources'!E:E = D2))), 0)

Note: Even when there’s a #CALC! error, Excel may return 1. Use the Content Item ID column to clean up any incorrect entries.

c. Content Item ID:

=TEXTJOIN(", ", TRUE, UNIQUE(FILTER('plugin-resources'!D:D, 'plugin-resources'!E:E = D2)))

Filter out rows returning #CALC!. For such rows, mark No of Citations as 1 and Content Item ID as “Nil”.

d. Content Item Name:

=TEXTJOIN(", ", TRUE, UNIQUE(FILTER('plugin-resources'!M:M, 'plugin-resources'!E:E = D2)))
Populate ticketing information (only applicable when a ticket is created)

a. Ticket Type: Indicates whether a ticket was user-initiated or bot-initiated.

=TEXTJOIN(", ", TRUE, UNIQUE(FILTER('plugin-resources'!S:S, 'plugin-resources'!E:E = D2)))

b. Ticket ID: Filter the values to include only rows where the resource type is “ticket”.

=TEXTJOIN(", ", TRUE, UNIQUE(FILTER('plugin-resources'!H:H,('plugin-resources'!E:E = D4)*('plugin-resources'!I:I="RESOURCE_TYPE_TICKET"))))
Fetch the platform where the interaction took place

This can be extracted from the interactions sheet based on Interaction ID:

=TEXTJOIN(", ", TRUE, UNIQUE(FILTER(interactions!H:H, interactions!D:D = D2)))
Get the user’s preferred language from the Users table

Use this formula to fetch preferred language based on user ID:

=TEXTJOIN(", ",TRUE,UNIQUE(FILTER(users!J:J,users!F:F=F3)))

Additional Notes:

Fields like User Department, User Location, and User Country are not directly available from the Data API. These attributes must be sourced from your organization’s external identity systems.

The User API provides an external ID which can be used to enrich user metadata by integrating with external identity providers such as Okta. You can refer to Okta List Users for details. Once this data is pulled from the external system, join it using the external ID to populate the missing attributes.

While the modifications shown here are demonstrated in Excel, we are also providing a Python script that can directly generate a raw interactions table based on the time input specified in the script

To run a Python file, first install Python from python.org. On Mac, open Terminal, navigate to your file’s folder, and run python3 script.py. On Windows, open Command Prompt, go to the file’s folder, and run python script.py.

1 import requests
2 import pandas as pd
3 import time
4 import logging
5 from pathlib import Path
6 import re
7 from datetime import datetime, timedelta
8 
9 # Config
10 BASE_API = 'https://api.moveworks.ai/export/v1beta2/records'
11 ENDPOINTS = [
12     'interactions',
13     'conversations',
14     'plugin-calls',
15     'plugin-resources',
16     'users'
17 ]
18 ACCESS_TOKEN = '<Access Token>'  # Replace this
19 
20 # Time filter will be set dynamically based on user input
21 
22 # Setup logging
23 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
24 
25 # Headers
26 headers = {
27     'Authorization': f'Bearer {ACCESS_TOKEN}',
28 }
29 
30 # Retry config
31 MAX_RETRIES = 5
32 
33 def get_user_date_inputs():
34     """Get start and end dates from user input and convert to timestamp format"""
35     print("\n" + "="*50)
36     print("RAW INTERACTIONS TABLE BUILDER")
37     print("="*50)
38     print("Enter the date range for data extraction:")
39     print("Date format: YYYY-MM-DD (e.g., 2025-07-15)")
40     print("-"*50)
41 
42     while True:
43         try:
44             start_date_str = input("Enter start date (YYYY-MM-DD): ").strip()
45             start_date = datetime.strptime(start_date_str, "%Y-%m-%d")
46             break
47         except ValueError:
48             print("❌ Invalid date format. Please use YYYY-MM-DD format (e.g., 2025-07-15)")
49 
50     while True:
51         try:
52             end_date_str = input("Enter end date (YYYY-MM-DD): ").strip()
53             end_date = datetime.strptime(end_date_str, "%Y-%m-%d")
54 
55             if end_date < start_date:
56                 print("❌ End date cannot be earlier than start date. Please try again.")
57                 continue
58             break
59         except ValueError:
60             print("❌ Invalid date format. Please use YYYY-MM-DD format (e.g., 2025-07-15)")
61 
62     # Convert to timestamp format for API
63     start_timestamp = start_date.strftime("%Y-%m-%dT00:00:00.000Z")
64     # For end date, set to end of day
65     end_timestamp = (end_date + timedelta(days=1) - timedelta(microseconds=1)).strftime("%Y-%m-%dT23:59:59.999Z")
66 
67     time_filter = f"last_updated_time ge '{start_timestamp}' and last_updated_time le '{end_timestamp}'"
68 
69     print(f"\n✅ Date range set:")
70     print(f"   Start: {start_date_str} 00:00:00 UTC")
71     print(f"   End:   {end_date_str} 23:59:59 UTC")
72     print(f"   Filter: {time_filter}")
73     print("-"*50)
74 
75     return time_filter, start_date_str, end_date_str
76 
77 def clean_illegal_chars(df):
78     """Clean illegal characters from DataFrame"""
79     def remove_illegal_chars(value):
80         if isinstance(value, str):
81             return re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F]', '', value)
82         return value
83     return df.applymap(remove_illegal_chars)
84 
85 def flatten_record(record):
86     """Flatten nested fields in record"""
87     flat = record.copy()
88     if isinstance(flat.get("detail"), dict):
89         detail = flat.pop("detail", {}) or {}
90         for key, value in detail.items():
91             if isinstance(value, list):
92                 value = ','.join(map(str, value))
93             flat[f'detail_{key}'] = value
94     return flat
95 
96 def fetch_data(endpoint, time_filter): # Added time_filter parameter
97     """Fetch data from a specific endpoint"""
98     url = f'{BASE_API}/{endpoint}'
99     params = {
100         '$orderby': 'id desc',
101         '$filter': time_filter, # Using the passed time_filter
102     }
103 
104     data = []
105     retries = 0
106     page_count = 0
107 
108     logging.info(f"Fetching data from /{endpoint}")
109 
110     while url:
111         try:
112             logging.info(f"Requesting page {page_count + 1}: {url}")
113             response = requests.get(url, headers=headers, params=params if url.endswith(endpoint) else None)
114 
115             if response.status_code == 200:
116                 json_resp = response.json()
117                 page_data = json_resp.get('value', [])
118                 logging.info(f"Retrieved {len(page_data)} records from page {page_count + 1}")
119 
120                 if not page_data:
121                     break
122 
123                 for record in page_data:
124                     data.append(flatten_record(record))
125 
126                 url = json_resp.get('@odata.nextLink')
127                 retries = 0
128                 page_count += 1
129 
130                 if endpoint == 'users':
131                     time.sleep(2)
132 
133             elif response.status_code == 429:
134                 wait = 90 if endpoint == 'users' else 60
135                 logging.warning(f"Rate limited on /{endpoint}. Waiting for {wait} seconds.")
136                 time.sleep(wait)
137 
138             elif response.status_code in (500, 502, 503, 504):
139                 if retries < MAX_RETRIES:
140                     wait = 2 ** retries
141                     logging.warning(f"Transient error {response.status_code} on /{endpoint}. Retrying in {wait} seconds...")
142                     time.sleep(wait)
143                     retries += 1
144                 else:
145                     logging.error(f"Max retries reached on /{endpoint}. Aborting.")
146                     break
147 
148             else:
149                 logging.error(f"Unexpected error {response.status_code} on /{endpoint}: {response.text}")
150                 break
151 
152         except Exception as e:
153             logging.exception(f"Exception occurred while calling /{endpoint}")
154             break
155 
156     if data:
157         df = pd.DataFrame(data)
158         df = clean_illegal_chars(df)
159         logging.info(f"Ingestion complete for /{endpoint}, {len(df)} records.")
160         return df
161     else:
162         logging.info(f"No data retrieved from /{endpoint}.")
163         return pd.DataFrame()
164 
165 def get_conversation_topics(interactions_df, conversation_id):
166     """Get all entities for a conversation ID"""
167     entities = interactions_df[interactions_df['conversation_id'] == conversation_id]['detail_entity'].dropna().unique()
168     return ', '.join([str(e) for e in entities if str(e) != 'nan'])
169 
170 def get_interaction_content(row, interactions_df):
171     """Extract interaction content from detail fields"""
172     interaction_id = row['id']
173     interaction_row = interactions_df[interactions_df['id'] == interaction_id]
174 
175     if interaction_row.empty:
176         return ""
177 
178     interaction_row = interaction_row.iloc[0]
179 
180     # Based on API example, only use fields that actually exist
181     # From the example: detail.content is the main field for user message content
182     if 'detail_content' in interaction_row and pd.notna(interaction_row['detail_content']) and str(interaction_row['detail_content']) != '':
183         return str(interaction_row['detail_content'])
184 
185     return ""
186 
187 def get_bot_response(row, interactions_df):
188     """Get bot response using parent interaction ID"""
189     interaction_id = row['id']
190 
191     # Find bot interactions with this interaction as parent
192     bot_response = interactions_df[
193         (interactions_df['parent_interaction_id'] == interaction_id) &
194         (interactions_df['actor'] == 'bot')
195     ]
196 
197     if not bot_response.empty:
198         bot_row = bot_response.iloc[0]
199         # Only use the content field that actually exists in the API
200         if 'detail_content' in bot_row and pd.notna(bot_row['detail_content']) and str(bot_row['detail_content']) != '':
201             return str(bot_row['detail_content'])
202 
203     return ""
204 
205 def get_plugin_info(interaction_id, plugin_calls_df, plugin_type):
206     """Get plugin information based on type (unsuccessful, served, used)"""
207     if plugin_calls_df.empty:
208         return ""
209 
210     interaction_plugins = plugin_calls_df[plugin_calls_df['interaction_id'] == interaction_id]
211 
212     if interaction_plugins.empty:
213         return ""
214 
215     if plugin_type == 'unsuccessful':
216         # Plugins that were not served and not used
217         plugins = interaction_plugins[
218             (interaction_plugins['served'] == False) &
219             (interaction_plugins['used'] == False)
220         ]['plugin_name'].unique()
221     elif plugin_type == 'served':
222         # Plugins that were served
223         plugins = interaction_plugins[
224             interaction_plugins['served'] == True
225         ]['plugin_name'].unique()
226     elif plugin_type == 'used':
227         # Plugins that were served and used
228         plugins = interaction_plugins[
229             (interaction_plugins['served'] == True) &
230             (interaction_plugins['used'] == True)
231         ]['plugin_name'].unique()
232     else:
233         return ""
234 
235     return ', '.join([str(p) for p in plugins if str(p) != 'nan'])
236 
237 def get_resource_info(interaction_id, plugin_resources_df, info_type):
238     """Get resource information (domain, citations, content items, tickets)"""
239     if plugin_resources_df.empty:
240         return "" if info_type != 'citation_count' else 0
241 
242     interaction_resources = plugin_resources_df[plugin_resources_df['interaction_id'] == interaction_id]
243 
244     if interaction_resources.empty:
245         return "" if info_type != 'citation_count' else 0
246 
247     if info_type == 'domain':
248         domains = interaction_resources['detail_domain'].dropna().unique()
249         return ', '.join([str(d) for d in domains if str(d) != 'nan'])
250 
251     elif info_type == 'citation_count':
252         unique_resources = interaction_resources['resource_id'].dropna().nunique()
253         return unique_resources
254 
255     elif info_type == 'content_item_id':
256         # Use external_resource_id from detail field
257         content_ids = interaction_resources['detail_external_resource_id'].dropna().unique()
258         return ', '.join([str(c) for c in content_ids if str(c) != 'nan'])
259 
260     elif info_type == 'content_item_name':
261         content_names = interaction_resources['detail_name'].dropna().unique()
262         return ', '.join([str(c) for c in content_names if str(c) != 'nan'])
263 
264     elif info_type == 'ticket_type':
265         # Need to determine ticket type logic based on your business rules
266         # This might need adjustment based on how you determine user vs bot initiated
267         ticket_resources = interaction_resources[ticket_resources['type'] == 'RESOURCE_TYPE_TICKET']
268         if not ticket_resources.empty:
269             return "user initiated ticket"  # Default, adjust logic as needed
270         return ""
271 
272     elif info_type == 'ticket_id':
273         ticket_resources = interaction_resources[interaction_resources['type'] == 'RESOURCE_TYPE_TICKET']
274         ticket_ids = ticket_resources['detail_external_resource_id'].dropna().unique()
275         return ', '.join([str(t) for t in ticket_ids if str(t) != 'nan'])
276 
277     return ""
278 
279 def get_interaction_surface(interaction_id, interactions_df):
280     """Get the platform/surface where interaction took place"""
281     interaction_row = interactions_df[interactions_df['id'] == interaction_id]
282 
283     if not interaction_row.empty:
284         # Use 'platform' field from the API response
285         surface = interaction_row.iloc[0].get('platform', '')
286         return str(surface) if pd.notna(surface) else ""
287 
288     return ""
289 
290 def get_user_language(user_id, users_df):
291     """Get user's preferred language"""
292     if users_df.empty:
293         return ""
294 
295     # Use the actual field name from API response
296     user_row = users_df[users_df['id'] == user_id]
297 
298     if not user_row.empty:
299         language = user_row.iloc[0].get('user_preferred_language', '')
300         return str(language) if pd.notna(language) else ""
301 
302     return ""
303 
304 def build_raw_interactions_table(all_dataframes):
305     """Build the final raw interactions table"""
306     logging.info("Building raw interactions table...")
307 
308     # Extract dataframes
309     interactions_df = all_dataframes.get('interactions', pd.DataFrame())
310     conversations_df = all_dataframes.get('conversations', pd.DataFrame())
311     plugin_calls_df = all_dataframes.get('plugin-calls', pd.DataFrame())
312     plugin_resources_df = all_dataframes.get('plugin-resources', pd.DataFrame())
313     users_df = all_dataframes.get('users', pd.DataFrame())
314 
315     if interactions_df.empty:
316         logging.error("No interactions data available")
317         return pd.DataFrame()
318 
319     # Filter for user-led interactions only
320     user_interactions = interactions_df[interactions_df['actor'] == 'user'].copy()
321 
322     if user_interactions.empty:
323         logging.warning("No user interactions found")
324         return pd.DataFrame()
325 
326     logging.info(f"Processing {len(user_interactions)} user interactions...")
327 
328     # Initialize the raw interactions table
329     raw_interactions = pd.DataFrame()
330 
331     # Base columns from interactions
332     raw_interactions['Timestamp'] = user_interactions['created_time']
333     raw_interactions['Conversation ID'] = user_interactions['conversation_id']
334     raw_interactions['Interaction ID'] = user_interactions['id']
335     raw_interactions['Interaction Type'] = user_interactions['type']  # Changed from 'interaction_type'
336 
337     # Get conversation domain from conversations table
338     if not conversations_df.empty:
339         conversation_domain_map = conversations_df.set_index('id')['primary_domain'].to_dict()  # Changed from 'domain'
340         raw_interactions['Conversation Domain'] = raw_interactions['Conversation ID'].map(conversation_domain_map)
341     else:
342         raw_interactions['Conversation Domain'] = ""
343 
344     # Get conversation topics (aggregated for each conversation)
345     logging.info("Processing conversation entities...")
346     raw_interactions['Conversation Topic'] = raw_interactions['Conversation ID'].apply(
347         lambda x: get_conversation_topics(interactions_df, x)
348     )
349 
350     # Get interaction content and bot response
351     logging.info("Processing interaction content and bot responses...")
352     raw_interactions['Interaction Content'] = user_interactions.apply(
353         lambda row: get_interaction_content(row, interactions_df), axis=1
354     )
355 
356     raw_interactions['Bot Response'] = user_interactions.apply(
357         lambda row: get_bot_response(row, interactions_df), axis=1
358     )
359 
360     # Get plugin information
361     logging.info("Processing plugin information...")
362     raw_interactions['Unsuccessful Plugins'] = raw_interactions['Interaction ID'].apply(
363         lambda x: get_plugin_info(x, plugin_calls_df, 'unsuccessful')
364     )
365 
366     raw_interactions['Plugin Served'] = raw_interactions['Interaction ID'].apply(
367         lambda x: get_plugin_info(x, plugin_calls_df, 'served')
368     )
369 
370     raw_interactions['Plugin Used'] = raw_interactions['Interaction ID'].apply(
371         lambda x: get_plugin_info(x, plugin_calls_df, 'used')
372     )
373 
374     # Get resource information
375     logging.info("Processing resource information...")
376     raw_interactions['Resource Domain'] = raw_interactions['Interaction ID'].apply(
377         lambda x: get_resource_info(x, plugin_resources_df, 'domain')
378     )
379 
380     raw_interactions['No of Citations'] = raw_interactions['Interaction ID'].apply(
381         lambda x: get_resource_info(x, plugin_resources_df, 'citation_count')
382     )
383 
384     raw_interactions['Content Item Name'] = raw_interactions['Interaction ID'].apply(
385         lambda x: get_resource_info(x, plugin_resources_df, 'content_item_name')
386     )
387 
388     raw_interactions['Content Item ID'] = raw_interactions['Interaction ID'].apply(
389         lambda x: get_resource_info(x, plugin_resources_df, 'content_item_id')
390     )
391 
392     raw_interactions['Ticket Type'] = raw_interactions['Interaction ID'].apply(
393         lambda x: get_resource_info(x, plugin_resources_df, 'ticket_type')
394     )
395 
396     raw_interactions['Ticket ID'] = raw_interactions['Interaction ID'].apply(
397         lambda x: get_resource_info(x, plugin_resources_df, 'ticket_id')
398     )
399 
400     # Get interaction surface
401     raw_interactions['Interaction Surface'] = raw_interactions['Interaction ID'].apply(
402         lambda x: get_interaction_surface(x, interactions_df)
403     )
404 
405     # Get user information
406     if 'user_id' in user_interactions.columns:
407         logging.info("Processing user information...")
408         raw_interactions['User Preferred Language'] = user_interactions['user_id'].apply(
409             lambda x: get_user_language(x, users_df)
410         )
411     else:
412         raw_interactions['User Preferred Language'] = ""
413 
414     # Add placeholder columns for external user data
415     raw_interactions['User Department'] = ""  # To be populated from external identity system
416     raw_interactions['User Location'] = ""    # To be populated from external identity system
417     raw_interactions['User Country'] = ""     # To be populated from external identity system
418 
419     logging.info(f"Raw interactions table built with {len(raw_interactions)} rows and {len(raw_interactions.columns)} columns")
420 
421     return raw_interactions
422 
423 def main():
424     """Main function to orchestrate the entire process"""
425     # Get user input for date range
426     time_filter, start_date_str, end_date_str = get_user_date_inputs()
427 
428     logging.info("Starting raw interactions table builder...")
429 
430     # Step 1: Fetch all data
431     all_dataframes = {}
432 
433     for endpoint in ENDPOINTS:
434         df = fetch_data(endpoint, time_filter)
435         if not df.empty:
436             all_dataframes[endpoint] = df
437 
438     if not all_dataframes:
439         logging.error("No data retrieved from any endpoint")
440         return
441 
442     # Step 2: Build raw interactions table
443     raw_interactions_table = build_raw_interactions_table(all_dataframes)
444 
445     if raw_interactions_table.empty:
446         logging.error("Failed to build raw interactions table")
447         return
448 
449     # Step 3: Save results
450     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
451 
452     # Save raw data to Excel (optional)
453     raw_data_file = Path(f"moveworks_raw_data_{start_date_str}_to_{end_date_str}_{timestamp}.xlsx")
454     with pd.ExcelWriter(raw_data_file, engine='openynxl') as writer:
455         for endpoint, df in all_dataframes.items():
456             sheet_name = endpoint[:31]  # Excel sheet name max limit
457             df.to_excel(writer, sheet_name=sheet_name, index=False)
458     logging.info(f"Raw data saved to {raw_data_file}")
459 
460     # Save final raw interactions table
461     final_file = Path(f"raw_interactions_table_{start_date_str}_to_{end_date_str}_{timestamp}.xlsx")
462     raw_interactions_table.to_excel(final_file, index=False, sheet_name='Raw Interactions')
463     logging.info(f"Final raw interactions table saved to {final_file}")
464 
465     # Also save as CSV for easier processing
466     csv_file = Path(f"raw_interactions_table_{start_date_str}_to_{end_date_str}_{timestamp}.csv")
467     raw_interactions_table.to_csv(csv_file, index=False)
468     logging.info(f"Final raw interactions table saved to {csv_file}")
469 
470     # Print summary
471     print("\n" + "="*50)
472     print("RAW INTERACTIONS TABLE SUMMARY")
473     print("="*50)
474     print(f"Total interactions processed: {len(raw_interactions_table)}")
475     print(f"Date range: {start_date_str} to {end_date_str}")
476     print(f"Time filter: {time_filter}")
477     print(f"Columns: {list(raw_interactions_table.columns)}")
478     print(f"Files generated:")
479     print(f"  - {raw_data_file} (raw data)")
480     print(f"  - {final_file} (final table - Excel)")
481     print(f"  - {csv_file} (final table - CSV)")
482 
483     # Show sample data
484     if len(raw_interactions_table) > 0:
485         print(f"\nSample data (first 3 rows):")
486         print(raw_interactions_table.head(3).to_string())
487 
488 if __name__ == "__main__":
489     main()