Finding count of duplicate numbers in a sorted array


The algorithm for finding the count of duplicate items makes use of binary search to find the index of the first occurrence and the index of the last occurrence of the item to be searched. Using these 2 indices, we can find the count of duplicate elements.

  • Finding the first occurrence
    If the searched item located at index mid and its previous item (i.e at index mid - 1) match, binary search continues in the sorted space to the left side of index mid i.e from index beg till index mid-1. This way the search continues until the previous item is the same as the item to be searched. When the search terminates we get the index of the first occurrence.

  • Finding the last occurrence
    If the searched item located at index mid and its next item (i.e at index mid + 1) matches the searched value, the search continues in the sorted space to the right side of mid i.e from index mid + 1 till index end. When the search terminates we get the index of the last occurrence.

      Thus, count of duplicate items = Index of the last occurrence - Index of the first occurrence + 1


Example

Binary_Search_Duplicates

Time complexity : Log ( N ), as we use the binary search algorithm at the core for finding the count of duplicate numbers.

Why is mid calculated as mid = beg + ( end - beg ) / 2 ?


Program for finding the count of duplicate numbers within a sorted array using binary search

def First ( array, n ) :
    
    beg = 0
    end = len(array) - 1

    while (beg <= end) :

        mid = int (beg + (end-beg)/2)

        if (array[mid] == n) :
            if (mid-1 >= 0 and array[mid-1] == n) :
               end = mid-1
               continue

            return mid 

        elif (array[mid] < n) :
            beg = mid + 1 
        else :
            end = mid - 1 

    return -1  

def Last (array, n) :
    
    beg = 0
    end = len(array)-1

    while (beg <= end) :

        mid = int(beg + (end-beg)/2)

        if (array[mid] == n) :
            if (mid+1 < len(array) and array[mid+1] == n) :
               beg = mid + 1 
               continue
            return mid 

        elif (array[mid] < n) :
            beg = mid + 1 
        else :
            end = mid - 1

    return -1; 

array = [ 1, 2, 3, 9, 9, 9, 9, 10, 10, 12, 13 ]
n = int (input("Enter the number : "))

first_index = First (array, n)
last_index  = Last (array, n)

if (first_index == -1 or last_index == - 1) :
    print("Element does not exist")
else :
    print("First occurrence of " + str(n) + " is at index : " + str(first_index))
    print("Last occurrence of " + str(n) + " is at index : " + str(last_index))
    print("Total count : "+ str(last_index - first_index + 1)) 

Output

Enter the number : 9
First occurrence of 9 is at index : 3
Last occurrence of 9 is at index : 6
Total count : 4

Enter the number : 10
First occurrence of 10 is at index : 7
Last occurrence of 10 is at index : 8
Total count : 2

Enter the number : 5
Element does not exist

Enter the number : 12
First occurrence of 12 is at index : 9
Last occurrence of 12 is at index : 9
Total count : 1
#include<iostream>
#include<vector>

using namespace std;

int First (vector<int>& vec, int n) {

    int beg = 0;
    int end = vec.size() - 1;

    while (beg <= end) {

        int mid = beg + (end-beg)/2;

        if (vec[mid] == n) {
            if (mid-1 >= 0 and vec[mid-1] == n) {
               end = mid-1;
               continue;
            }
            return mid;
        }

        else if (vec[mid] < n)
            beg = mid + 1;
        else
            end = mid - 1;
    }
    return -1;
}

int Last (vector<int>& vec, int n) {

    int beg = 0;
    int end = vec.size()-1;

    while (beg <= end) {

        int mid = beg + (end-beg)/2;

        if (vec[mid] == n) {
            if (mid+1 < vec.size() and  vec[mid+1] == n) {
               beg = mid + 1;
               continue;
            }
            return mid;
        }

        else if (vec[mid] < n)
            beg = mid + 1;
        else
            end = mid - 1;
    }
    return -1;
}

int main() {
   vector<int> vec = { 1, 2, 3, 9, 9, 9, 9, 10, 10, 12, 13 };

   int first_index, last_index, n;
   cout << "Enter the number : ";
   cin >> n;

   first_index = First (vec, n);
   last_index =  Last (vec, n);

   if (first_index == -1 or last_index == - 1) {
       cout << "Element does not exist" << endl;
   } else {
       cout << "First occurrence of " << n << " is at index : " << first_index << endl;
       cout << "Last occurrence of " << n << " is at index : " << last_index << endl;
       cout << "Total count : " << last_index - first_index + 1;
   }
   return 0;
}

Output

Enter the number : 13
First occurrence of 13 is at index : 10
Last occurrence of 13 is at index : 10
Total count : 1

Enter the number : 8
Element does not exist

Enter the number : 5
Element does not exist

Enter the number : 3
First occurrence of 3 is at index : 2
Last occurrence of 3 is at index : 2
Total count : 1

Enter the number : 9
First occurrence of 9 is at index : 3
Last occurrence of 9 is at index : 6
Total count : 4
public class FindDuplicates {

    int FirstOccurrence (int[] array, int n) {

        int beg = 0;
        int end = array.length - 1;

        while (beg <= end) {

            int mid = beg + (end-beg)/2;

            if (array[mid] == n) {
                if (mid-1 >= 0 && array[mid-1] == n) {
                   end = mid-1;
                   continue;
                }
                return mid;
            }

            else if (array[mid] < n)
                beg = mid + 1;
            else
                end = mid - 1;
        }
        return -1;
    }

    int LastOccurrence (int[] array, int n) {

        int beg = 0;
        int end = array.length - 1;

        while (beg <= end) {

            int mid = beg + (end-beg)/2;

            if (array[mid] == n) {
                if (mid+1 < array.length && array[mid+1] == n) {
                   beg = mid + 1;
                   continue;
                }
                return mid;
            }

            else if (array[mid] < n)
                beg = mid + 1;
            else
                end = mid - 1;
        }
        return -1;
    }

    public static void main (String[] args) {

        int[] array = { 1, 2, 3, 9, 9, 9, 9, 10, 10, 12, 13 };
        int first_index, last_index, n;
        n = 9;

        FindDuplicates fd = new FindDuplicates();

        first_index = fd.FirstOccurrence(array, n);
        last_index =  fd.LastOccurrence(array, n);

        if (first_index == -1 || last_index == - 1) {
            System.out.println("Element does not exist");
        } else {
            System.out.println("First Occurrence of " + n + " is at index : " + first_index);
            System.out.println("Last Occurrence of " + n + " is at index : " + last_index);
            System.out.println("Total Count : "+ (last_index - first_index + 1));
        }

        fd = null;
    }
}

Output

First Occurrence of 9 is at index : 3
Last Occurrence of 9 is at index : 6
Total Count : 4



Copyright (c) 2019-2023, Algotree.org.
All rights reserved.