Google Sheets – How to Extract Time from Text Using Regex

google sheetsregex

I hope someone can help me with my struggle. I need to get the time slots after each "Text_Text" from this one sentence that is in a single cell:

Sth 03/30/2020 1:15 PM-03/30/2020 1:30 PM;"Text_Text" 03/30/2020 1:30 PM-03/30/2020 3:30 PM;"Sth" 03/30/2020 3:30 PM-03/30/2020 4:00 PM;"Text_Text" 03/30/2020 4:00 PM-03/30/2020 4:30 PM;"Sthh" 03/30/2020 6:00 PM-03/30/2020 6:15 PM;

Desired outcome:
start time and end time in different cells: 1:30 PM 3:30 PM 4:00 PM 4:30 PM

The pattern and length of the sentence are not consistent. But time always comes after "Text_Text" and the date.
I am working in Google Sheets . I know that this could be done with regex but it's too confusing for me and I can't get the syntax right. So far i was only able to get only 1 slot with split function but this is far way from what I need.

Best Answer

Your have data in a single Column spread over many rows. Each cell contains a string comprising multiples of records that follow a given pattern. The pattern is:

  • identifier
  • date#1
  • time#1
  • date#2
  • time#2
  • semicolon

For example: Sth 03/30/2020 1:15 PM-03/30/2020 1:30 PM;.

Each cell value contains many contiguous records, but there is no consistency for the number of records that comprise a cell value.

One of the identifiers is called "Text_text".

You wish to recover the time values from each "Text_text" record in every cell, in every row of data.

There may be many ways to achieve this outcome, including built-in functions. The following answer is a script solution. The solution is not as efficient as it might be since the times are written row-by-row rather than at the end of processing. However, some further user development would enable this barrier to be overcome.

The key aspects of the script are:

  • var count = values[i][0].split(searchTerm).length-1;: this counts the number of instances of the search term ("Text_text") that occur in a cell.
  • var rowtimes=[];: a temporary array is created to store the relevant time values as they are extracted from the cell value. Later this array can be updated to the spreadsheet.
  • var searchcounter = 0;: a counter to enable progressive searches for the search term within a given cell value.
  • use of javascript indexOf to search for the search term, the dash and the semi-colon.
    • var indexOfn = val.indexOf(searchTerm,searchcounter);: returns the index number of the search term. Note that the the starting number for the search is set by the searchcounter.
    • var indexofdash = val.indexOf(searchTermD, indexOfn);: searches for the first dash that follows the index number of the search term
    • var indexofsc = val.indexOf(searchTermSC, indexOfn);: searches for the first semicolon that follows the index number of the search term
  • if(indexofdash-indexOfn == 29){ and if(indexofsc-indexofdash == 19){: methods of determining whether the hour is between "1" and "9", or between "10" and "12"
  • use of the Javascript substring to extract the hour value from the cell; the calculation is simplified by referencing the search index number as the end index, and the same value less "7" or "8" as appropriate, as the start index.
    • var timestr1 = val.substring(indexofdash-7, indexofdash);
    • var timestr1 = val.substring(indexofdash-8, indexofdash);
    • var timestr2 = val.substring(indexofsc-7, indexofsc);
    • var timestr2 = val.substring(indexofsc-8, indexofsc);
  • sheet.getRange(+i+1,2,1,+count*2).setValues([rowtimes]);: updating the hour values to the spreadsheet by row

function wa140898() {

  // setup the spreadsheet
  var ss = SpreadsheetApp.getActiveSpreadsheet();
  var sheetname = "Sheet4";
  var sheet = ss.getSheetByName(sheetname);

  // get last row of contents in Column A
  var Avals = sheet.getRange("A1:A").getValues();
  var Alast = Avals.filter(String).length;

  // get the data
  var range = sheet.getRange(1,1,Alast);
  var values = range.getValues();

  // create search terms
  var searchTerm = "Text_Text";
  var searchTermD = "-"; // the first time preceeds a dash
  var searchTermSC = ";"; // the second time preceeds a semicolon

  // loop through the cells of data
  for (var i=0;i<Alast;i++){


    var searchcounter = 0; // set counter to zero

    var val = values[i][0]; // get the cell value
    var count =  values[i][0].split(searchTerm).length-1; // count the number of "Text_Text" vales in the cell
    var rowtimes=[]; // setup an temporary array for the times in each row.

    // loop though the value
    for (n = 0;n<count;n++){

      // get the index values for the search attaributes
      var indexOfn = val.indexOf(searchTerm,searchcounter);
      var indexofdash = val.indexOf(searchTermD, indexOfn);
      var indexofsc = val.indexOf(searchTermSC, indexOfn);

      //      searchcounter = searchcounter+indexOfn+1;
      searchcounter = +indexOfn+1;

      //Logger.log("n:"+n+", next indexof = "+indexOfn+", next dash = "+indexofdash+", next semicolon = "+indexofsc)

      /*
      Calculate the difference between the index for "Text_text" and the index for the following dash
      if the difference is 29 characters, then the hour value is between 1 and 9. The total time is 7 characters (e.g. "1:30 PM")
      if the difference is not 29 (it is 30), then the hour value is between 10 and 12. The total time is 8 characters (e.g. "11:30 PM")
      */
      if(indexofdash-indexOfn == 29){
         var timestr1 = val.substring(indexofdash-7, indexofdash);
         //Logger.log("time#1= "+timestr1)
         rowtimes.push(timestr1); // push the value to the temporary array
      } else{
        var timestr1 = val.substring(indexofdash-8, indexofdash);
        //Logger.log("time#1= "+timestr1)
        rowtimes.push(timestr1); // push the value to the temporary array
      }

      /*
      Calculate the difference between the index for the dash and the index for the following semi-colon.
      if the difference is 19 characters, then the hour value is between 1 and 9. The total time is 7 characters (e.g. "1:30 PM")
      if the difference is not 19 (it is 20), then the hour value is between 10 and 12. The total time is 8 characters (e.g. "11:30 PM")
      */
      if(indexofsc-indexofdash == 19){        
         var timestr2 = val.substring(indexofsc-7, indexofsc);
         //Logger.log("time#2= "+timestr2)
         rowtimes.push(timestr2);// push the value to the temporary array
      }
      else{
        var timestr2 = val.substring(indexofsc-8, indexofsc);
        //Logger.log("time#2= "+timestr2)
        rowtimes.push(timestr2);// push the value to the temporary array
      }

    } // end of loop for this value

    // update the hours output
    sheet.getRange(+i+1,2,1,+count*2).setValues([rowtimes]);
  }

} // end function

Sample data

Sample data