--- title: How to build raw interactions table using Data API deprecated: false hidden: false link: new_tab: false metadata: robots: index --- This document describes the process of reconstructing the Raw Interactions table using data retrieved from the Data API. The Data API provides raw data in a normalized format, split across multiple related tables. To recreate the unified Raw Interactions view, these tables must be joined and transformed to align with the structure and semantics of the original dataset. In this exercise, we download the raw data into an Excel file and perform the necessary transformations within Excel to build the final view. ## **Step - 1 : Fetching and storing data into CSV files.** The Data API distributes raw data across five key tables: Conversations, Interactions, Plugin Calls, Plugin Resources, and Users. We use a Python script to fetch data from the past 24 hours and generate an Excel file containing all five tables. You can copy the script into your environment and execute it. Be sure to replace the placeholder for the API key with your actual Data API credentials before running the script. ![](https://files.readme.io/404d00aab8df2a39bde7e44f999dadcaab10d66996f056a1a2c743a9fc556b57-accesstoken.png) ### Python script ```python import requests import pandas as pd import time import logging from pathlib import Path import re # Config BASE_API = 'https://api.moveworks.ai/export/v1beta2/records' ENDPOINTS = [ 'interactions', 'conversations', 'plugin-calls', 'plugin-resources', 'users' ] ACCESS_TOKEN = '' # Replace this # Filter time window (applied to all endpoints) TIME_FILTER = "last_updated_time ge '2025-07-15T00:00:00.000Z' and last_updated_time le '2025-07-16T00:00:00.000Z'" # Setup logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # Headers headers = { 'Authorization': f'Bearer {ACCESS_TOKEN}', } # Retry config MAX_RETRIES = 5 # Function to clean illegal characters def clean_illegal_chars(df): def remove_illegal_chars(value): if isinstance(value, str): return re.sub(r'[�- -]', '', value) return value return df.applymap(remove_illegal_chars) # Flatten nested fields def flatten_record(record): flat = record.copy() if isinstance(flat.get("detail"), dict): detail = flat.pop("detail", {}) or {} for key, value in detail.items(): if isinstance(value, list): value = ','.join(map(str, value)) flat[f'detail_{key}'] = value return flat # Collect data per endpoint all_dataframes = {} def fetch_data(endpoint): url = f'{BASE_API}/{endpoint}' params = { '$orderby': 'id desc', '$filter': TIME_FILTER, } data = [] retries = 0 page_count = 0 logging.info(f"Fetching data from /{endpoint}") while url: try: logging.info(f"Requesting page {page_count + 1}: {url}") response = requests.get(url, headers=headers, params=params if url.endswith(endpoint) else None) if response.status_code == 200: json_resp = response.json() page_data = json_resp.get('value', []) logging.info(f"Retrieved {len(page_data)} records from page {page_count + 1}") if not page_data: break for record in page_data: data.append(flatten_record(record)) url = json_resp.get('@odata.nextLink') retries = 0 page_count += 1 if endpoint == 'users': time.sleep(2) elif response.status_code == 429: wait = 90 if endpoint == 'users' else 60 logging.warning(f"Rate limited on /{endpoint}. Waiting for {wait} seconds.") time.sleep(wait) elif response.status_code in (500, 502, 503, 504): if retries < MAX_RETRIES: wait = 2 ** retries logging.warning(f"Transient error {response.status_code} on /{endpoint}. Retrying in {wait} seconds...") time.sleep(wait) retries += 1 else: logging.error(f"Max retries reached on /{endpoint}. Aborting.") break else: logging.error(f"Unexpected error {response.status_code} on /{endpoint}: {response.text}") break except Exception as e: logging.exception(f"Exception occurred while calling /{endpoint}") break if data: df = pd.DataFrame(data) df = clean_illegal_chars(df) all_dataframes[endpoint] = df logging.info(f"Ingestion complete for /{endpoint}, {len(df)} records.") else: logging.info(f"No data retrieved from /{endpoint}.") # Run ingestion for all endpoints for endpoint in ENDPOINTS: fetch_data(endpoint) # Save to Excel with one sheet per endpoint if all_dataframes: output_file = Path("moveworks_export.xlsx") with pd.ExcelWriter(output_file, engine='openpyxl') as writer: for endpoint, df in all_dataframes.items(): sheet_name = endpoint[:31] # Excel sheet name max limit df.to_excel(writer, sheet_name=sheet_name, index=False) logging.info(f"All data written to {output_file}") else: logging.warning("No data to write to Excel.") ``` ![](https://files.readme.io/8969f6e0ab1b1521db60574883d6cd889e925df7a6c01ea27fc76ef86827ffd5-image-scriptview.png) ## Step - 2 : Modification of the excel file to generate the raw interactions view. Let’s learn the sequence of columns shown in raw interactions table and understand where this data is stored in Data API
Raw interactions table Definition Data API table
Timestamp This the created time for user interaction Interactions
Conversation ID Conversation ID attributed to an interaction stored Interactions
Conversation Domain This is detected domain on a conversation level Conversations
Conversation Topic All topics aggregated for interactions based on the conversation ID Interactions
Interaction Type Type of interaction Interactions
Interaction content User message or action information Interactions
Bot Response Summarized response provided by the AI Assistant Interactions
Unsuccessful Plugins Plugins called by the AI Assistant but were not served and not used Plugin calls
Plugin Served Plugins that the reasoner considered and served to the user Plugin calls
Plugin Used Plugins that the user interacted or engaged with Plugin calls
Resource Domain Resource domain configured during setting up the ingestion. Plugin resource
No of citations Total no of citations provided in the summarized response Plugin resource
Content Item name Name of the content item used to generate the summarized response Plugin resource
Content Item ID External resource ID of the content item Plugin resource
Ticket type Indicates who filed the ticket 1. user initiated ticket : ticket filed by the end user as an escalation 2. bot initiated ticket : ticket filed by the AI Assistant as a receipt Plugin resource
Ticket ID Unique identifier for a ticket Plugin resource
Interaction surface Platform where the interaction took place Interactions
User Department Department to which the end user belongs to *
User Location Location to which the end user belongs to *
User country Country to which the end user belongs to *
User Preferred language Default language configured in the AI Assistant Users
Now that we have identified the columns required to reconstruct the Raw Interactions view, let’s begin working through the raw data and progressively merge the **interactions** and **conversations** tables to form a unified view. Please note that we will be using Excel functions throughout this process. 1. **Bring in the base columns — Actor, Conversation ID, Created Time, Interaction ID, Interaction Type** a. From the _interactions_ sheet, extract the relevant details into a new sheet. We are only interested in user-led interactions. Use the following formula to filter this data: `=FILTER(interactions!A:D, interactions!A:A="user")` 2. **Using the Conversation ID, fetch Conversation Domain and Conversation Topic** a. To fetch the _Conversation Domain_, use the `VLOOKUP` function to pull data from the _conversations_ sheet: `=VLOOKUP(B4,conversations!B:E,3,FALSE)` b. To recreate the _Conversation Topic_, aggregate all topics detected at the interaction level and replicate them for each interaction. Use this formula: `=TEXTJOIN(", ", TRUE, UNIQUE(FILTER(interactions!P:P, interactions!B:B= B2)))` 3. **Fetch additional content fields from the Interactions table** a. To extract _Interaction Content_, pull values from `detail_content`, `detail_detail`, or similar columns, and combine them based on data availability. Use the following nested `IF` structure: `=IF(INDEX(interactions!K:K, MATCH(D1294, interactions!D:D, 0))<>"", INDEX(interactions!K:K, MATCH(D1294, interactions!D:D, 0)), IF(INDEX(interactions!N:N, MATCH(D1294, interactions!D:D, 0))<>"", INDEX(interactions!N:N, MATCH(D1294, interactions!D:D, 0)), IF(INDEX(interactions!L:L, MATCH(D1294, interactions!D:D, 0))<>"", INDEX(interactions!L:L, MATCH(D1294, interactions!D:D, 0)), IF(INDEX(interactions!Q:Q, MATCH(D1294, interactions!D:D, 0))<>"", INDEX(interactions!Q:Q, MATCH(D1294, interactions!D:D, 0)), ""))))` b. To retrieve the _Bot Response_ corresponding to each user interaction, use the parent interaction ID to index into bot-led interactions: `=TEXTJOIN(CHAR(10), TRUE, FILTER(interactions!K:K, interactions!G:G=D2))` There can be multiple bot interactions pointing to a single user interaction. Join these interactions to form a bot response similar to raw interactions table. 4. **Determine plugins involved in an interaction using the Plugin Calls table** Retrieve three plugin-related columns using the formulas below: a. _Unsuccessful plugin_: `=TEXTJOIN(", ", TRUE,FILTER('plugin-calls'!F:F,('plugin-calls'!D:D=D10) *('plugin-calls'!H:H =FALSE) *('plugin-calls'!I:I=FALSE)))` b. _Plugin served_: `=TEXTJOIN(", ", TRUE,FILTER('plugin-calls'!F:F,('plugin-calls'!D:D=D10) *('plugin-calls'!H:H =TRUE)))` c. _Plugin used_: `=TEXTJOIN(", ", TRUE,FILTER('plugin-calls'!F:F,('plugin-calls'!D:D=D10) *('plugin-calls'!H:H =TRUE) *('plugin-calls'!I:I=TRUE)))` 5. **Add plugin resource information** Use the _plugin-resources_ table to fetch citation and content-related metadata: a. _Resource Domain_: Indicates the configured domain for knowledge or ticket-based interactions. `=TEXTJOIN(", ", TRUE, UNIQUE(FILTER('plugin-resources'!K:K, 'plugin-resources'!E:E = D2)))` b. _Number of Citations_: Counts distinct resources cited per interaction. `=IFERROR(COUNTA(UNIQUE(FILTER('plugin-resources'!D:D, 'plugin-resources'!E:E = D2))), 0)` Note: Even when there's a #CALC! error, Excel may return 1. Use the Content Item ID column to clean up any incorrect entries. c. _Content Item ID_: `=TEXTJOIN(", ", TRUE, UNIQUE(FILTER('plugin-resources'!D:D, 'plugin-resources'!E:E = D2)))` Filter out rows returning `#CALC!`. For such rows, mark _No of Citations_ as 1 and _Content Item ID_ as "Nil". d. _Content Item Name_: `=TEXTJOIN(", ", TRUE, UNIQUE(FILTER('plugin-resources'!M:M, 'plugin-resources'!E:E = D2)))` 6. **Populate ticketing information (only applicable when a ticket is created)** a. _Ticket Type_: Indicates whether a ticket was user-initiated or bot-initiated. `=TEXTJOIN(", ", TRUE, UNIQUE(FILTER('plugin-resources'!S:S, 'plugin-resources'!E:E = D2)))` b. _Ticket ID_: Filter the values to include only rows where the resource type is "ticket". `=TEXTJOIN(", ", TRUE, UNIQUE(FILTER('plugin-resources'!H:H,('plugin-resources'!E:E = D4)*('plugin-resources'!I:I="RESOURCE_TYPE_TICKET"))))` 7. **Fetch the platform where the interaction took place** This can be extracted from the _interactions_ sheet based on Interaction ID: `=TEXTJOIN(", ", TRUE, UNIQUE(FILTER(interactions!H:H, interactions!D:D = D2)))` 8. **Get the user’s preferred language from the Users table** Use this formula to fetch preferred language based on user ID: `=TEXTJOIN(", ",TRUE,UNIQUE(FILTER(users!J:J,users!F:F=F3)))` *** **Additional Notes:** Fields like _User Department_, _User Location_, and _User Country_ are not directly available from the Data API. These attributes must be sourced from your organization’s external identity systems. The **User API** provides an `external ID` which can be used to enrich user metadata by integrating with external identity providers such as Okta. You can refer to [Okta List Users](https://developer.okta.com/docs/api/openapi/okta-management/management/tag/User/#tag/User/operation/listUsers) for details. Once this data is pulled from the external system, join it using the external ID to populate the missing attributes. *** While the modifications shown here are demonstrated in Excel, we are also providing a Python script that can directly generate a raw interactions table based on the time input specified in the script To run a Python file, first install Python from [python.org](https://www.python.org/downloads/). On **Mac**, open Terminal, navigate to your file’s folder, and run `python3 script.py`. On **Windows**, open Command Prompt, go to the file’s folder, and run `python script.py`. ```python import requests import pandas as pd import time import logging from pathlib import Path import re from datetime import datetime, timedelta # Config BASE_API = 'https://api.moveworks.ai/export/v1beta2/records' ENDPOINTS = [ 'interactions', 'conversations', 'plugin-calls', 'plugin-resources', 'users' ] ACCESS_TOKEN = '' # Replace this # Time filter will be set dynamically based on user input # Setup logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') # Headers headers = { 'Authorization': f'Bearer {ACCESS_TOKEN}', } # Retry config MAX_RETRIES = 5 def get_user_date_inputs(): """Get start and end dates from user input and convert to timestamp format""" print("\n" + "="*50) print("RAW INTERACTIONS TABLE BUILDER") print("="*50) print("Enter the date range for data extraction:") print("Date format: YYYY-MM-DD (e.g., 2025-07-15)") print("-"*50) while True: try: start_date_str = input("Enter start date (YYYY-MM-DD): ").strip() start_date = datetime.strptime(start_date_str, "%Y-%m-%d") break except ValueError: print("❌ Invalid date format. Please use YYYY-MM-DD format (e.g., 2025-07-15)") while True: try: end_date_str = input("Enter end date (YYYY-MM-DD): ").strip() end_date = datetime.strptime(end_date_str, "%Y-%m-%d") if end_date < start_date: print("❌ End date cannot be earlier than start date. Please try again.") continue break except ValueError: print("❌ Invalid date format. Please use YYYY-MM-DD format (e.g., 2025-07-15)") # Convert to timestamp format for API start_timestamp = start_date.strftime("%Y-%m-%dT00:00:00.000Z") # For end date, set to end of day end_timestamp = (end_date + timedelta(days=1) - timedelta(microseconds=1)).strftime("%Y-%m-%dT23:59:59.999Z") time_filter = f"last_updated_time ge '{start_timestamp}' and last_updated_time le '{end_timestamp}'" print(f"\n✅ Date range set:") print(f" Start: {start_date_str} 00:00:00 UTC") print(f" End: {end_date_str} 23:59:59 UTC") print(f" Filter: {time_filter}") print("-"*50) return time_filter, start_date_str, end_date_str def clean_illegal_chars(df): """Clean illegal characters from DataFrame""" def remove_illegal_chars(value): if isinstance(value, str): return re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F]', '', value) return value return df.applymap(remove_illegal_chars) def flatten_record(record): """Flatten nested fields in record""" flat = record.copy() if isinstance(flat.get("detail"), dict): detail = flat.pop("detail", {}) or {} for key, value in detail.items(): if isinstance(value, list): value = ','.join(map(str, value)) flat[f'detail_{key}'] = value return flat def fetch_data(endpoint, time_filter): # Added time_filter parameter """Fetch data from a specific endpoint""" url = f'{BASE_API}/{endpoint}' params = { '$orderby': 'id desc', '$filter': time_filter, # Using the passed time_filter } data = [] retries = 0 page_count = 0 logging.info(f"Fetching data from /{endpoint}") while url: try: logging.info(f"Requesting page {page_count + 1}: {url}") response = requests.get(url, headers=headers, params=params if url.endswith(endpoint) else None) if response.status_code == 200: json_resp = response.json() page_data = json_resp.get('value', []) logging.info(f"Retrieved {len(page_data)} records from page {page_count + 1}") if not page_data: break for record in page_data: data.append(flatten_record(record)) url = json_resp.get('@odata.nextLink') retries = 0 page_count += 1 if endpoint == 'users': time.sleep(2) elif response.status_code == 429: wait = 90 if endpoint == 'users' else 60 logging.warning(f"Rate limited on /{endpoint}. Waiting for {wait} seconds.") time.sleep(wait) elif response.status_code in (500, 502, 503, 504): if retries < MAX_RETRIES: wait = 2 ** retries logging.warning(f"Transient error {response.status_code} on /{endpoint}. Retrying in {wait} seconds...") time.sleep(wait) retries += 1 else: logging.error(f"Max retries reached on /{endpoint}. Aborting.") break else: logging.error(f"Unexpected error {response.status_code} on /{endpoint}: {response.text}") break except Exception as e: logging.exception(f"Exception occurred while calling /{endpoint}") break if data: df = pd.DataFrame(data) df = clean_illegal_chars(df) logging.info(f"Ingestion complete for /{endpoint}, {len(df)} records.") return df else: logging.info(f"No data retrieved from /{endpoint}.") return pd.DataFrame() def get_conversation_topics(interactions_df, conversation_id): """Get all entities for a conversation ID""" entities = interactions_df[interactions_df['conversation_id'] == conversation_id]['detail_entity'].dropna().unique() return ', '.join([str(e) for e in entities if str(e) != 'nan']) def get_interaction_content(row, interactions_df): """Extract interaction content from detail fields""" interaction_id = row['id'] interaction_row = interactions_df[interactions_df['id'] == interaction_id] if interaction_row.empty: return "" interaction_row = interaction_row.iloc[0] # Based on API example, only use fields that actually exist # From the example: detail.content is the main field for user message content if 'detail_content' in interaction_row and pd.notna(interaction_row['detail_content']) and str(interaction_row['detail_content']) != '': return str(interaction_row['detail_content']) return "" def get_bot_response(row, interactions_df): """Get bot response using parent interaction ID""" interaction_id = row['id'] # Find bot interactions with this interaction as parent bot_response = interactions_df[ (interactions_df['parent_interaction_id'] == interaction_id) & (interactions_df['actor'] == 'bot') ] if not bot_response.empty: bot_row = bot_response.iloc[0] # Only use the content field that actually exists in the API if 'detail_content' in bot_row and pd.notna(bot_row['detail_content']) and str(bot_row['detail_content']) != '': return str(bot_row['detail_content']) return "" def get_plugin_info(interaction_id, plugin_calls_df, plugin_type): """Get plugin information based on type (unsuccessful, served, used)""" if plugin_calls_df.empty: return "" interaction_plugins = plugin_calls_df[plugin_calls_df['interaction_id'] == interaction_id] if interaction_plugins.empty: return "" if plugin_type == 'unsuccessful': # Plugins that were not served and not used plugins = interaction_plugins[ (interaction_plugins['served'] == False) & (interaction_plugins['used'] == False) ]['plugin_name'].unique() elif plugin_type == 'served': # Plugins that were served plugins = interaction_plugins[ interaction_plugins['served'] == True ]['plugin_name'].unique() elif plugin_type == 'used': # Plugins that were served and used plugins = interaction_plugins[ (interaction_plugins['served'] == True) & (interaction_plugins['used'] == True) ]['plugin_name'].unique() else: return "" return ', '.join([str(p) for p in plugins if str(p) != 'nan']) def get_resource_info(interaction_id, plugin_resources_df, info_type): """Get resource information (domain, citations, content items, tickets)""" if plugin_resources_df.empty: return "" if info_type != 'citation_count' else 0 interaction_resources = plugin_resources_df[plugin_resources_df['interaction_id'] == interaction_id] if interaction_resources.empty: return "" if info_type != 'citation_count' else 0 if info_type == 'domain': domains = interaction_resources['detail_domain'].dropna().unique() return ', '.join([str(d) for d in domains if str(d) != 'nan']) elif info_type == 'citation_count': unique_resources = interaction_resources['resource_id'].dropna().nunique() return unique_resources elif info_type == 'content_item_id': # Use external_resource_id from detail field content_ids = interaction_resources['detail_external_resource_id'].dropna().unique() return ', '.join([str(c) for c in content_ids if str(c) != 'nan']) elif info_type == 'content_item_name': content_names = interaction_resources['detail_name'].dropna().unique() return ', '.join([str(c) for c in content_names if str(c) != 'nan']) elif info_type == 'ticket_type': # Need to determine ticket type logic based on your business rules # This might need adjustment based on how you determine user vs bot initiated ticket_resources = interaction_resources[ticket_resources['type'] == 'RESOURCE_TYPE_TICKET'] if not ticket_resources.empty: return "user initiated ticket" # Default, adjust logic as needed return "" elif info_type == 'ticket_id': ticket_resources = interaction_resources[interaction_resources['type'] == 'RESOURCE_TYPE_TICKET'] ticket_ids = ticket_resources['detail_external_resource_id'].dropna().unique() return ', '.join([str(t) for t in ticket_ids if str(t) != 'nan']) return "" def get_interaction_surface(interaction_id, interactions_df): """Get the platform/surface where interaction took place""" interaction_row = interactions_df[interactions_df['id'] == interaction_id] if not interaction_row.empty: # Use 'platform' field from the API response surface = interaction_row.iloc[0].get('platform', '') return str(surface) if pd.notna(surface) else "" return "" def get_user_language(user_id, users_df): """Get user's preferred language""" if users_df.empty: return "" # Use the actual field name from API response user_row = users_df[users_df['id'] == user_id] if not user_row.empty: language = user_row.iloc[0].get('user_preferred_language', '') return str(language) if pd.notna(language) else "" return "" def build_raw_interactions_table(all_dataframes): """Build the final raw interactions table""" logging.info("Building raw interactions table...") # Extract dataframes interactions_df = all_dataframes.get('interactions', pd.DataFrame()) conversations_df = all_dataframes.get('conversations', pd.DataFrame()) plugin_calls_df = all_dataframes.get('plugin-calls', pd.DataFrame()) plugin_resources_df = all_dataframes.get('plugin-resources', pd.DataFrame()) users_df = all_dataframes.get('users', pd.DataFrame()) if interactions_df.empty: logging.error("No interactions data available") return pd.DataFrame() # Filter for user-led interactions only user_interactions = interactions_df[interactions_df['actor'] == 'user'].copy() if user_interactions.empty: logging.warning("No user interactions found") return pd.DataFrame() logging.info(f"Processing {len(user_interactions)} user interactions...") # Initialize the raw interactions table raw_interactions = pd.DataFrame() # Base columns from interactions raw_interactions['Timestamp'] = user_interactions['created_time'] raw_interactions['Conversation ID'] = user_interactions['conversation_id'] raw_interactions['Interaction ID'] = user_interactions['id'] raw_interactions['Interaction Type'] = user_interactions['type'] # Changed from 'interaction_type' # Get conversation domain from conversations table if not conversations_df.empty: conversation_domain_map = conversations_df.set_index('id')['primary_domain'].to_dict() # Changed from 'domain' raw_interactions['Conversation Domain'] = raw_interactions['Conversation ID'].map(conversation_domain_map) else: raw_interactions['Conversation Domain'] = "" # Get conversation topics (aggregated for each conversation) logging.info("Processing conversation entities...") raw_interactions['Conversation Topic'] = raw_interactions['Conversation ID'].apply( lambda x: get_conversation_topics(interactions_df, x) ) # Get interaction content and bot response logging.info("Processing interaction content and bot responses...") raw_interactions['Interaction Content'] = user_interactions.apply( lambda row: get_interaction_content(row, interactions_df), axis=1 ) raw_interactions['Bot Response'] = user_interactions.apply( lambda row: get_bot_response(row, interactions_df), axis=1 ) # Get plugin information logging.info("Processing plugin information...") raw_interactions['Unsuccessful Plugins'] = raw_interactions['Interaction ID'].apply( lambda x: get_plugin_info(x, plugin_calls_df, 'unsuccessful') ) raw_interactions['Plugin Served'] = raw_interactions['Interaction ID'].apply( lambda x: get_plugin_info(x, plugin_calls_df, 'served') ) raw_interactions['Plugin Used'] = raw_interactions['Interaction ID'].apply( lambda x: get_plugin_info(x, plugin_calls_df, 'used') ) # Get resource information logging.info("Processing resource information...") raw_interactions['Resource Domain'] = raw_interactions['Interaction ID'].apply( lambda x: get_resource_info(x, plugin_resources_df, 'domain') ) raw_interactions['No of Citations'] = raw_interactions['Interaction ID'].apply( lambda x: get_resource_info(x, plugin_resources_df, 'citation_count') ) raw_interactions['Content Item Name'] = raw_interactions['Interaction ID'].apply( lambda x: get_resource_info(x, plugin_resources_df, 'content_item_name') ) raw_interactions['Content Item ID'] = raw_interactions['Interaction ID'].apply( lambda x: get_resource_info(x, plugin_resources_df, 'content_item_id') ) raw_interactions['Ticket Type'] = raw_interactions['Interaction ID'].apply( lambda x: get_resource_info(x, plugin_resources_df, 'ticket_type') ) raw_interactions['Ticket ID'] = raw_interactions['Interaction ID'].apply( lambda x: get_resource_info(x, plugin_resources_df, 'ticket_id') ) # Get interaction surface raw_interactions['Interaction Surface'] = raw_interactions['Interaction ID'].apply( lambda x: get_interaction_surface(x, interactions_df) ) # Get user information if 'user_id' in user_interactions.columns: logging.info("Processing user information...") raw_interactions['User Preferred Language'] = user_interactions['user_id'].apply( lambda x: get_user_language(x, users_df) ) else: raw_interactions['User Preferred Language'] = "" # Add placeholder columns for external user data raw_interactions['User Department'] = "" # To be populated from external identity system raw_interactions['User Location'] = "" # To be populated from external identity system raw_interactions['User Country'] = "" # To be populated from external identity system logging.info(f"Raw interactions table built with {len(raw_interactions)} rows and {len(raw_interactions.columns)} columns") return raw_interactions def main(): """Main function to orchestrate the entire process""" # Get user input for date range time_filter, start_date_str, end_date_str = get_user_date_inputs() logging.info("Starting raw interactions table builder...") # Step 1: Fetch all data all_dataframes = {} for endpoint in ENDPOINTS: df = fetch_data(endpoint, time_filter) if not df.empty: all_dataframes[endpoint] = df if not all_dataframes: logging.error("No data retrieved from any endpoint") return # Step 2: Build raw interactions table raw_interactions_table = build_raw_interactions_table(all_dataframes) if raw_interactions_table.empty: logging.error("Failed to build raw interactions table") return # Step 3: Save results timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") # Save raw data to Excel (optional) raw_data_file = Path(f"moveworks_raw_data_{start_date_str}_to_{end_date_str}_{timestamp}.xlsx") with pd.ExcelWriter(raw_data_file, engine='openynxl') as writer: for endpoint, df in all_dataframes.items(): sheet_name = endpoint[:31] # Excel sheet name max limit df.to_excel(writer, sheet_name=sheet_name, index=False) logging.info(f"Raw data saved to {raw_data_file}") # Save final raw interactions table final_file = Path(f"raw_interactions_table_{start_date_str}_to_{end_date_str}_{timestamp}.xlsx") raw_interactions_table.to_excel(final_file, index=False, sheet_name='Raw Interactions') logging.info(f"Final raw interactions table saved to {final_file}") # Also save as CSV for easier processing csv_file = Path(f"raw_interactions_table_{start_date_str}_to_{end_date_str}_{timestamp}.csv") raw_interactions_table.to_csv(csv_file, index=False) logging.info(f"Final raw interactions table saved to {csv_file}") # Print summary print("\n" + "="*50) print("RAW INTERACTIONS TABLE SUMMARY") print("="*50) print(f"Total interactions processed: {len(raw_interactions_table)}") print(f"Date range: {start_date_str} to {end_date_str}") print(f"Time filter: {time_filter}") print(f"Columns: {list(raw_interactions_table.columns)}") print(f"Files generated:") print(f" - {raw_data_file} (raw data)") print(f" - {final_file} (final table - Excel)") print(f" - {csv_file} (final table - CSV)") # Show sample data if len(raw_interactions_table) > 0: print(f"\nSample data (first 3 rows):") print(raw_interactions_table.head(3).to_string()) if __name__ == "__main__": main() ```