A program using pyramid scaling, Canny, contours and contour simplification to find squares in the input image.
#include <iostream>
using namespace cv;
using namespace std;
int thresh = 50, N = 11;
const char* wndname = "Square Detection Demo";
{
    double dx1 = pt1.
x - pt0.
x;
 
    double dy1 = pt1.
y - pt0.
y;
 
    double dx2 = pt2.
x - pt0.
x;
 
    double dy2 = pt2.
y - pt0.
y;
 
    return (dx1*dx2 + dy1*dy2)/
sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2) + 1e-10);
 
}
static void findSquares( 
const UMat& image, vector<vector<Point> >& squares )
 
{
    squares.clear();
    
    vector<vector<Point> > contours;
    
    for( int c = 0; c < 3; c++ )
    {
        int ch[] = {c, 0};
        
        for( int l = 0; l < N; l++ )
        {
            
            
            if( l == 0 )
            {
                
                
                Canny(gray0, gray, 0, thresh, 5);
 
                
                
            }
            else
            {
                
                
            }
            
            vector<Point> approx;
            
            for( size_t i = 0; i < contours.size(); i++ )
            {
                
                
                
                
                
                
                
                
                if( approx.size() == 4 &&
                {
                    double maxCosine = 0;
                    for( int j = 2; j < 5; j++ )
                    {
                        
                        double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1]));
                        maxCosine = 
MAX(maxCosine, cosine);
                    }
                    
                    
                    
                    if( maxCosine < 0.3 )
                        squares.push_back(approx);
                }
            }
        }
    }
}
static void drawSquares( 
UMat& _image, 
const vector<vector<Point> >& squares )
 
{
    for( size_t i = 0; i < squares.size(); i++ )
    {
        const Point* p = &squares[i][0];
 
        int n = (int)squares[i].size();
    }
}
static UMat drawSquaresBoth( 
const UMat& image,
 
                            const vector<vector<Point> >& sqs)
{
    drawSquares(imgToShow, sqs);
    return imgToShow;
}
int main(int argc, char** argv)
{
    const char* keys =
        "{ i input    | ../data/pic1.png   | specify input image }"
        "{ o output   | squares_output.jpg | specify output save path}"
        "{ h help     |                    | print help message }"
        "{ m cpu_mode |                    | run without OpenCL }";
    if(cmd.has("help"))
    {
        cout << "Usage : " << argv[0] << " [options]" << endl;
        cout << "Available options:" << endl;
        cmd.printMessage();
        return EXIT_SUCCESS;
    }
    if (cmd.has("cpu_mode"))
    {
        cout << "OpenCL was disabled" << endl;
    }
    string outfile = cmd.get<string>("o");
    int iterations = 10;
    vector<vector<Point> > squares;
    {
        cout << "Couldn't load " << inputName << endl;
        cmd.printMessage();
        return EXIT_FAILURE;
    }
    int j = iterations;
    
    cout << "warming up ..." << endl;
    findSquares(image, squares);
    do
    {
        findSquares(image, squares);
        cout << "run loop: " << j << endl;
    }
    while(--j);
    cout << 
"average time: " << 1000.0f * (double)t_cpp / 
getTickFrequency() / iterations << 
"ms" << endl;
    UMat result = drawSquaresBoth(image, squares);
 
    return EXIT_SUCCESS;
}