% Function ParseSEMAINEAnnotations is intended to demonstrate example usage % of SEMAINE Action Unit annotations made with ELAN annotation toolbox. % This function loads the XML structure from an ELAN annotation file with % ".eaf" extension, parses it and returns a numerical matrix called % "activations" of size NUMBER OF FRAMES X NUMBER OF ACTION UNITS. The % matrix holds binary activation status for each frame / AU combination. % The matrix also has a row header showing which AU corresponds to which % row as well as a column header displaying original frame indexes. % The function takes 1 compulsory and 2 optional arguments: % - "filepath" (compulsory) - complete path to an annotation file to parse. % For example, "/matlab/annotation.eaf" or "C:\matlab\annotation.eaf" on % Windows. % - "startFrame" (optional) - ignore all annotations before "startFrame". % Default is 1. % - "endFrame" (optional) - ignore all annotations after "endFrame". % Default is the last frame of a video. % The function requires XML IO Toolbox % (http://www.mathworks.com/matlabcentral/fileexchange/12907-xml-io-tools) % to run properly (supplied). function activations = ParseSEMAINEAnnotations (filepath, startFrame, endFrame) activations = []; % Framerate value used to convert ELAN millisecond time slots to more % usual frames. 50 is a valid framerate for all SEMAINE videos. framerate = 50; % A fixed set of 6 Action Units selected for the challenge from the % SEMAINE annotations aus = [2 12 17 25 28 45]; % Total number of AUs. naus = length(aus); % Load XML structure from the file, return in case of a problem. [success, XML] = OpenXML(filepath); if ~success return end % Parse annotation time slots tslots = ParseTimeSlots(XML); % Init start and end frames with default values if nargin < 2 startFrame = 1; end if nargin < 3 % Get total number of time slots ntslots = length(tslots); % Get last slot ID lastID = strcat('ts', num2str(ntslots)); % Get last time slot value in ms lastValue = tslots(lastID); % Convert last time slot value in ms to frames endFrame = floor((lastValue / 1000) * framerate); end % Get total number of tiers. There are 65 of them, 1 for speech, 32 for % activations (1 per AU) and 32 for intensities. We are going to ignore % intensity tiers. ntiers = length(XML.TIER); % Compose vector of frame indexes to extract annotations from frames = (startFrame:endFrame); % Preallocate activations matrix activations = zeros(length(frames), naus); indx = 1; % Go through all tiers skipping the first one (speech) as well as every % intensity tier. A single activation tier is processed at every % iteration. for k = 2:2:ntiers tier = XML.TIER(k); % Only extract annotations of selected AUs, skip the rest au = strcat('AU', num2str(aus(indx))); if strcmp(au, tier.ATTRIBUTE.TIER_ID) % Read all activation periods from the current tier activationTier = ParseActivationTier(tier, tslots); % Convert of all activation periods into frame level numerical % representation activations(:, indx) = ParseOccurrences(activationTier, frames, framerate); indx = indx + 1; end if indx > naus break end end activations = [frames' activations]; activations = [[0 aus]; activations]; end function occurrences = ParseOccurrences (activations, frames, framerate) % Preallocate activations vector occurrences = zeros(length(frames), 1); % Go through all activation periods, convert ms into frames and init % corresponding values of activations vector with 1 leaving the rest be 0 for i = 1:length(activations) % Convert ms into frames sframe = floor((activations(i).start / 1000) * framerate); eframe = floor((activations(i).end / 1000) * framerate); % Determine indexes of frames vector corresponding to the above % time frame sindx = find(frames == sframe); eindx = find(frames == eframe); % Mark active set of frames with 1 occurrences(sindx:eindx) = 1; end end function activationTier = ParseActivationTier (tier, tslots) % Get total number of activation periods nactivations = length(tier.ANNOTATION); % Preallocate activation tier structure holding start and end time % stamps of all activation periods for the given AU activationTier = repmat(struct('start', 0, 'end', 0), nactivations, 1); % Go through all activation periods and init activation tier % structure array for i = 1:nactivations % Read start time slot ID of the current activation period t = tier.ANNOTATION(i).ALIGNABLE_ANNOTATION.ATTRIBUTE.TIME_SLOT_REF1; % Read time in ms corresponding to the time slot ID activationTier(i).start = tslots(t); % Read end time slot ID of the current activation period t = tier.ANNOTATION(i).ALIGNABLE_ANNOTATION.ATTRIBUTE.TIME_SLOT_REF2; % Read time in ms corresponding to the time slot ID activationTier(i).end = tslots(t); end end function tslots = ParseTimeSlots (xmlObject) % Get total number of time slots nslots = length(xmlObject.TIME_ORDER.TIME_SLOT); % Preallocate cell arrays of time slot IDs and values tids = cell(nslots, 1); tvalues = zeros(nslots, 1); % Read all time slot IDs and numerical values (in ms) for i = 1:nslots tids{i} = xmlObject.TIME_ORDER.TIME_SLOT(i).ATTRIBUTE.TIME_SLOT_ID; tvalues(i) = xmlObject.TIME_ORDER.TIME_SLOT(i).ATTRIBUTE.TIME_VALUE; end % Map time slot IDs and values together so that values are accessible % by their IDs tslots = containers.Map(tids, tvalues); end function [success, xmlObject] = OpenXML (xmlPath) fprintf(' *** Attempting to load \"%s\" ... ', xmlPath); xmlObject = []; success = false; % Check if the specified file exists and return error otherwise if exist(xmlPath, 'file') % Load XML structure xmlObject = xml_read(xmlPath); % Check if XML object loaded correctly, return error otherwise if isempty(xmlObject) fprintf(' ERROR - unable to read xml tree *** \n'); return else success = true; end else fprintf(' ERROR - specified path does not exist *** \n'); return end fprintf(' Done *** \n'); end