How to remove duplicates from an array in google sheets by comparing columns - google-apps-script

Hi I have a Sheet where by I am adding data to it by appending it to the bottom of the proceeding of the earlier data.
So the data has two columns I want to Compare (A and Y) A will have duplicates in and Y is a timestamp as a number.
So I want to write a script to remove duplicate rows where there are multiple A records and where the highest timestamp exists ..
can anyone help?

Removing Duplicates in Column A with the highest datevalues in Column Y
I tested this on a pretty limited data set so there could still be bugs in it. Check it out thoroughly before using it on important data.
//Used to build my datasheet with values in Column A and dates in column Y
function setupMyDuplicates(){
var dt=new Date();
var day=24*60*60*1000;
var ss=SpreadsheetApp.getActive();
var sh=ss.getActiveSheet();
var rg1=sh.getRange(1,1,20,1);
var vA1=rg1.getValues();
var rg2=sh.getRange(1,25,20,1);
var vA2=rg2.getValues();
for(var i=0;i<vA1.length;i+=2){
vA1[i][0]=(i%10)+1;
vA2[i][0]=Utilities.formatDate(new Date(dt.valueOf()+i*day), Session.getScriptTimeZone(), "MMM dd, yyy");
vA1[i+1][0]=(i%10)+2;
vA2[i+1][0]=Utilities.formatDate(new Date(dt.valueOf()+((i+1) * day)), Session.getScriptTimeZone(), "MMM dd, yyy");
}
rg1.setValues(vA1);
rg2.setValues(vA2);
}
//This function is used to help create my object array
function uniqueObj(v,dv,r){
return {value:v,datevalue:dv,row:r};
}
//This function is kind of like Array.indexOf() except for the array of my objects
function indexOfUniqueArray(v,uA){
var vA=[];
for(var i=0;i<uA.length;i++){
vA.push(uA[i].value);
}
return vA.indexOf(v);
}
function removeMyDuplicates(){
var ss=SpreadsheetApp.getActive();
var sh=ss.getActiveSheet();
var rg=sh.getDataRange();
var vA=rg.getValues();
var rA=[];
var uA=[];
for(var i=0;i<vA.length;i++){
var idx=indexOfUniqueArray(vA[i][0],uA);
if(idx>-1){//idx is greater than -1 then it's a duplicate
var dv=new Date(vA[i][24]).valueOf();
if(dv>uA[idx].datevalue){//if current value is greater that the uA[idx].value then change the datevalue and the row number and the value the same.
uA[idx].datevalue=dv;
uA[idx].row=i;
}
}else{//If idx == -1 then it's unique and so I add it to uA
uA.push(uniqueObj(vA[i][0],new Date(vA[i][24]).valueOf(),i));
}
}
for(var i=0;i<uA.length;i++){
rA.push(uA[i].row);//building the array of rows that have the largest datetime value in column Y (ie the rows you want to keep)
}
for(var i=vA.length-1;i>-1;i--){
if(rA.indexOf(i)==-1){
sh.deleteRow(i+1);//deleting unwanted rows
}
}
}

Related

google sheet custom function to multiply columns

I am working on a google app script to fetch a range, multiply one column in the range by another column in the range, then output to a different column within the original range. This should be simple, but I can't figure it out.
function myFunction() {
var sheet = SpreadsheetApp.getActive().getSheetByName("Sheet1");
var data = sheet.getRange("A1:B22").getValues();
var newData = []
for (i in data){
var row = data[i];
var multiply = row[0] * row[1]
newData.push(multiply)
}
sheet.getRange(1,3,22,1).setValues(newData)
}
Thanks for the recommendation, but that didn't work unfortunately. The reason I'm not using the arrayformula function is because this is actually a small piece of a larger, more complicated function.
What did end up working is pushing the "multiply" variable into newData as an array by putting it in brackets.
function myFunction() {
var sheet = SpreadsheetApp.getActive().getSheetByName("Sheet1");
var data = sheet.getRange("A1:B22").getValues();
var newData = []
for (i in data){
var row = data[i];
var multiply = row[0] * row[1]
newData.push([multiply])
Logger.log(newData)
}
sheet.getRange(1,3,22,1).setValues(newData)
}

Find if value exists in column

How can I extend my function to check if the new email already exists in column F?
function emailUpdate() {
var ss = SpreadsheetApp.getActiveSheet();
var emailRow = ss.getRange("F2:F").getValues();
var emailRowNum = emailRow.filter(String).length + 1;
var emailNew = ss.getRange("F"+emailRowNum).getValues();
return emailNew;}
So I have a googlesheet with emails saved in column F. I am trying to compare the last row value (emailNew) with all previous values in column F but I'm unsure how to do that ...
Comparing the last value in a range with all of the previous values.
function comparelastvalue()
{
var ss=SpreadsheetApp.getActive();
var sh=ss.getActiveSheet();
var lastRow=sh.getLastRow();
var rg=sh.getRange(2, 6, lastRow, 1);
var vA=rg.getValues();
var theseRowsEqualTheValueInTheLastRowA=[];
for(var i=0;i<lastRow-2;i++)
{
if(vA[i][0]==vA[lastRow-1][0])
{
theseRowsEqualTheValueInTheLastRowA.push(i+2);
}
}
}

Google Apps Script: Get specific data and preview elsewhere

I have a sheet of a long (sorted) list, with column A with date and column B with time and then some data on columns c,d,e,f.
I want to have a script function that compares current date-time to the date and time from columns A & B and then copies to columns i,j,k,l,m,n on rows 1-4 the "upcoming" data.
So I'm thinking of something like this, but unsure how to finish it. Any ideas?
function myFunction()
{
var d = new Date();
var now = d.getTime();
var as = SpreadsheetApp.getActiveSheet();
var data = as.getDataRange().getValues(); //this seems to be wrong, because I don't need all DataRange, only first 6 columns, but as many rows as those having data
var c = 0;
for(i in data)
{
var row = data[i];
//var rowdate = ??;
if (now < rowdate)
{
c++;
//copy from row to destination location
}
if (c > 4) break;
}
}

How to use for to sum a range in google app script?

I want create a simple custom sum formula in google Spreadsheets using google app Script.
function somaDias(range, days) {
var sum = 0;
for(i=0; i<days; i++){
sum = sum + range[i];
}
return soma;
}
I will use this function in a report that has a column with monetary values for each day of the month. The idea is to select the whole column of values, pass it as a range and them sum this values up to a day of the month. I know there is something wrong with
sum = sum + range[i];
because, when I test it with a column that has only 1... the return is "0111111"
=somaDias(C9:C15;6) // should return 6, but it returns 0111111
I don´t want to use =sum(), because the range changes according to the day. I want to pass as the range the 30 days of the month and then pass the number of days I want to sum. If I used sum, I would have to reselect the range everyday....
What I´m doing wrong?
Why not using the spreadsheet built in function SUM() ? (doc here)
edit following comment.
I don't like custom functions but here is a way to achieve what you want : use only 1 parameter = number of days, it will count from the first column.
function somaDias(days) {
var sum = 0;
var sh = SpreadsheetApp.getActiveSheet();
var values = sh.getRange(sh.getActiveCell().getRowIndex(),1,1,days).getValues();
for(var i in values[0]){
sum += values[0][i];
}
return sum;
}
If you loop through the values you can use the function below. You can specify the object and what column index needs to be summed.
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getActiveSheet();
var range = sheet.getRange("A1:F30");
var values = range.getValues();
function sum( obj, colindex ) {
var sum = 0;
for( var el in obj ) {
sum += parseFloat( obj[el][colindex] );
}
return sum;
}

Google Script to remove duplicate rows based on 2 columns criteria

I am using a script in the that pulls events details from a calendar and adds them into column A and B in a spreadsheet, remove any duplicate events and then sort based on date. My hope is then that I can have staff add additional data about these events in columns C,D, etc.
This seems to be working fine, but once information is added in columns C,D the scripts de-duplication function stops working because it is comparing the full row and not simply the content being imported.
Is there a method to adjust the below de-duplication script to only check columns A and B when deciding if rows are duplicate and should be removed?
I have attempted to use the code (currently commented out) adjustment below as found in the variation section in this article: https://developers.google.com/apps-script/articles/removing_duplicates - but it still does not seem to work.
Thank you for any assistance
The Script:
enter code here
//this section retrieves the information from a calendar from a user submitted date until the end of the year
function importEvents(){
var calID = Browser.inputBox("Please enter your google Cal ID", Browser.Buttons.OK_CANCEL);
var startdate = Browser.inputBox("Start date using 1/1/2013 format", Browser.Buttons.OK_CANCEL);
var cal = CalendarApp.getCalendarById(calID);
var events_sheet = SpreadsheetApp.getActiveSheet();
var events = cal.getEvents(new Date(startdate), new Date("1/1/2014"));
var lr = events_sheet.getLastRow();
var eventarray = new Array();
var i = 0; // edited
for (i = 0; i < events.length; i++) {
line = new Array();
line.push(events[i].getStartTime());
line.push(events[i].getTitle());
//Potential more data that I am not getting at this time
// line.push(events[i].getDescription());
// line.push(events[i].getEndTime());
eventarray.push(line);
}
events_sheet.getRange("A"+(lr+1)+":B"+(lr+i)).setValues(eventarray);
//sort ascending dy date
var range = SpreadsheetApp.getActiveSheet().getRange("A3:F2000");
range.sort([{column: 1, ascending: true}]);
//removes duplicate rows
var sheet = SpreadsheetApp.getActiveSheet();
var data = sheet.getDataRange().getValues();
var newData = new Array();
for(i in data){
var row = data[i];
var duplicate = false;
for(j in newData){
if(row.join() == newData[j].join()){
duplicate = true;
//This was supposed to only check the first 2 columns, but it doesn't work
//I found this code in the variation section of this tutorial: https://developers.google.com/apps-script/articles/removing_duplicates
//
// if(row[0] == newData[j][0] && row[1] == newData[j][1]){
// duplicate = true;
}
}
if(!duplicate){
newData.push(row);
}
}
sheet.clearContents();
sheet.getRange(1, 1, newData.length, newData[0].length).setValues(newData);
}
That commented out block can be made to work, with some tweaking. A problem arises because of the nature of your data, and the fact that the tutorial didn't take into account the way that object comparisons work. (Refer to JavaScript Date Object Comparison.)
Your first column consists of Date objects, which the == comparator will evaluate true only when both sides of the comparison are the same object. In the whole-row comparison, the dates are being coerced to strings by the .join() operation. We can get the same effect on a cell-by-cell basis, like this:
if(row[0].toString() == newData[j][0].toString() && row[1] == newData[j][1]){
duplicate = true;
}
Alternatively, we can restrict the comparison to the first two columns by using the Array.slice() operation. This way, we don't need to know what types are being compared, because we'll still use .join() to form a string for the comparison:
if(row.slice(0,2).join() == newData[j].slice(0,2).join()){
duplicate = true;
}
Sorting
Another issue you'll run into is your placement of range.sort(). It's dangerous to sort before removing duplicates, because you can't guarantee the order of newly-retrieved event data vs data for the same event that you already had which may have additional columns of information. As a result, the duplicate removal could erase your user-entered info. It's safer to leave sorting to be a final step, or include additional columns in the sort to guarantee order.
Another little issue: The use of .getRange("A3:F2000") has a side-effect of expanding your spreadsheet to 2000 lines; you could instead use .getRange("A3:F"), which will retrieve the maximum range without expanding it.
But I suggest you do the sorting entirely with javascript arrays, which will be much faster than using spreadsheet services. I'm assuming you have two lines of header information that you want to retain at the top of the sheet, since you were sorting from A3. Here's a way to do the sort:
// sort ascending by date - retain header lines
var headers = newData.slice(0,2);
var sorted = newData.slice(2).sort(sortFunction);
var newData = headers.concat(sorted);
Where sortFunction() is:
function sortFunction( a, b ) {
// coerce dates to numbers and return comparison
return ((+a[0]) - (+b[0]))
}
Final Script
Here's what you get, with the above changes.
function importEvents(){
var calID = Browser.inputBox("Please enter your google Cal ID", Browser.Buttons.OK_CANCEL);
var startdate = Browser.inputBox("Start date using 1/1/2013 format", Browser.Buttons.OK_CANCEL);
var cal = CalendarApp.getCalendarById(calID);
var events_sheet = SpreadsheetApp.getActiveSheet();
var events = cal.getEvents(new Date(startdate), new Date("1/1/2014"));
var lr = events_sheet.getLastRow();
var eventarray = new Array();
var i = 0; // edited
for (i = 0; i < events.length; i++) {
line = new Array();
line.push(events[i].getStartTime());
line.push(events[i].getTitle());
//Potential more data that I am not getting at this time
// line.push(events[i].getDescription());
// line.push(events[i].getEndTime());
eventarray.push(line);
}
// Append the retreived events to existing spreadsheet
events_sheet.getRange("A"+(lr+1)+":B"+(lr+i)).setValues(eventarray);
//remove duplicate rows
var sheet = SpreadsheetApp.getActiveSheet();
var data = sheet.getDataRange().getValues();
var newData = new Array();
for(i in data){
var row = data[i];
var duplicate = false;
for(j in newData){
if(row.slice(0,2).join() == newData[j].slice(0,2).join()){
duplicate = true;
}
}
if(!duplicate){
newData.push(row);
}
}
// sortFunction used to compare rows of data in our newData array
function sortFunction( a, b ) {
// coerce dates to numbers and return comparison
return ((+a[0]) - (+b[0]))
}
// sort ascending by date - retain header lines
var headers = newData.slice(0,2);
var sorted = newData.slice(2).sort(sortFunction);
var newData = headers.concat(sorted);
// Clear the existing info and update with newData.
sheet.clearContents();
sheet.getRange(1, 1, newData.length, newData[0].length).setValues(newData);
}
I would suggest that you stay at array level all along, including the sort process.
You can easily choose on what cell you sort by using a sort function like below :
// This example sorts on first column of data array
data.sort(function(x,y){
var xp = x[0];
var yp = y[0];
//Logger.log(xp+' '+yp);// just to check that it takes the right column
return xp == yp ? 0 : xp < yp ? -1 : 1;// first sort ascending
});
As for the duplicate removal there are a few ways to do it, one possible is like that :
var newData = new Array();
for(var i in data){
var duplicate = false;
for(var j in newData){
if(data[i][0].toString()+data[i].toString() == newData[j][0].toString()+newData[j][1].toString()){ duplicate = true }
}
if(!duplicate){ newData.push(data[i]) }
}

Resources