steve steve - 4 months ago 10
MySQL Question

mysql query - not scaling well, slow with 15k records

I have a large SQL query (mysql) used to populate a table of records in a CRM system.

This has been working well and quite fast with around 4000 records. Now reaching 15,500 it's running painfully slow. Taking around 70 seconds to return data.

I have tried adding some indexes, but with limited success. Any suggestions?

The query is:

SELECT SQL_NO_CACHE SQL_CALC_FOUND_ROWS
s.*,
ca2.address_postcode,
ca2.resident_status
FROM
(
SELECT
a.id,
c.name_first,
c.name_last,
a.created as app_created,
a.edited,
a.comments as custcomment,
c.name_company,
a.loan_amount,
a.product_type,
ap.packager as placed_with,
a.introducer_id,
a.status,
c.contact_number,
c.email,
a.database,
bd.legal_structure,
(
SELECT
ca1.id
FROM
cl_customer_address AS ca1
WHERE
ca1.customer_id = a.customer_id AND
ca1.deleted = "0000-00-00 00:00:00"
ORDER BY
ca1.created DESC
LIMIT
1
) AS address_id
FROM
cl_application AS a
LEFT JOIN
cl_application_business_details as bd on bd.id = a.id
LEFT JOIN
cl_customer AS c ON c.id = a.customer_id AND c.deleted = c.deleted
LEFT JOIN
cl_application_packager AS ap ON ap.application_id = a.id AND ap.status != "Declined" AND ap.deleted = "0000-00-00 00:00:00"
WHERE
(a.deleted = "0000-00-00 00:00:00")
GROUP BY
a.id
) AS s
LEFT JOIN
cl_customer_address AS ca2 ON ca2.id = s.address_id AND ca2.deleted = ca2.deleted
WHERE
(ca2.deleted = ca2.deleted OR ca2.deleted IS NULL)
ORDER BY
app_created DESC
LIMIT
0, 100;

Time: 70.382
Rows: 100


The table descriptions are:

CREATE TABLE cl_application (
id int(11) NOT NULL AUTO_INCREMENT,
customer_id int(11) NOT NULL,
product_type tinytext COLLATE utf8_unicode_ci,
introducer_id int(11) NOT NULL,
loan_amount double NOT NULL,
loan_purpose tinytext COLLATE utf8_unicode_ci NOT NULL,
comments text COLLATE utf8_unicode_ci NOT NULL,
security_value double NOT NULL,
property_address_1 tinytext COLLATE utf8_unicode_ci NOT NULL,
property_address_2 tinytext COLLATE utf8_unicode_ci NOT NULL,
property_town_city tinytext COLLATE utf8_unicode_ci NOT NULL,
property_postcode varchar(8) COLLATE utf8_unicode_ci NOT NULL,
property_country tinytext COLLATE utf8_unicode_ci NOT NULL,
application_source tinytext COLLATE utf8_unicode_ci NOT NULL,
`status` enum('WK - Working Lead','APP - Application Taken','ISS - Pack Issued','HOT - Head Of Terms Sent','APU - Application underway','OFI - Offer Issued','DIP - Deal in Progress','DUS - Declined Unsecured/Trying Secured','DUG - Declined Unsecured/Trying Guarantor','COM - Completed Awaiting Payment','PAC - Paid and Completed','TD - Turned Down','DUP - Duplicate application') COLLATE utf8_unicode_ci NOT NULL DEFAULT 'WK - Working Lead',
multi_stage_status text COLLATE utf8_unicode_ci NOT NULL,
owner_id int(11) NOT NULL,
token tinytext COLLATE utf8_unicode_ci NOT NULL,
cache_keyword text COLLATE utf8_unicode_ci NOT NULL,
old_id int(11) NOT NULL,
placed_with tinytext COLLATE utf8_unicode_ci NOT NULL,
submitted datetime NOT NULL,
`database` enum('LV','TD','CD','UL') COLLATE utf8_unicode_ci NOT NULL DEFAULT 'LV',
snoozed datetime NOT NULL DEFAULT '0000-00-00 00:00:00',
created datetime NOT NULL,
edited datetime NOT NULL,
deleted datetime NOT NULL,
PRIMARY KEY (id),
FULLTEXT KEY cache_keyword (cache_keyword)
) ;

CREATE TABLE cl_application_packager (
id int(11) NOT NULL AUTO_INCREMENT,
application_id int(11) NOT NULL,
packager tinytext COLLATE utf8_unicode_ci,
packager_id int(11) DEFAULT NULL,
amount double DEFAULT NULL,
`status` enum('In Progress','Declined','Accepted','Completed') COLLATE utf8_unicode_ci DEFAULT 'In Progress',
commision double DEFAULT NULL,
created datetime NOT NULL,
edited datetime NOT NULL,
deleted datetime NOT NULL,
PRIMARY KEY (id),
KEY application_id (deleted)
) ;

CREATE TABLE cl_application_business_details (
id int(11) NOT NULL AUTO_INCREMENT,
company_number tinytext COLLATE utf8_unicode_ci,
legal_structure enum('Ltd','LLP','Partnership','Sole Trader') COLLATE utf8_unicode_ci DEFAULT NULL,
incorporated date DEFAULT NULL,
created datetime NOT NULL,
edited datetime NOT NULL,
deleted datetime NOT NULL,
PRIMARY KEY (id)
) ;

CREATE TABLE cl_customer (
id int(11) NOT NULL AUTO_INCREMENT,
title enum('Mr','Mrs','Ms','Miss') COLLATE utf8_unicode_ci NOT NULL,
name_first tinytext COLLATE utf8_unicode_ci NOT NULL,
name_last tinytext COLLATE utf8_unicode_ci NOT NULL,
dob date NOT NULL,
marital_status tinytext COLLATE utf8_unicode_ci NOT NULL,
name_company tinytext COLLATE utf8_unicode_ci NOT NULL,
email tinytext COLLATE utf8_unicode_ci NOT NULL,
alt_email tinytext COLLATE utf8_unicode_ci,
contact_number tinytext COLLATE utf8_unicode_ci NOT NULL,
home_phone_number tinytext COLLATE utf8_unicode_ci NOT NULL,
work_phone_number tinytext COLLATE utf8_unicode_ci NOT NULL,
created datetime NOT NULL,
edited datetime NOT NULL,
deleted datetime NOT NULL,
PRIMARY KEY (id)
) ;

CREATE TABLE cl_customer_address (
id int(11) NOT NULL AUTO_INCREMENT,
customer_id int(11) NOT NULL,
application_id int(11) NOT NULL,
house_name tinytext COLLATE utf8_unicode_ci NOT NULL,
house_number tinytext COLLATE utf8_unicode_ci NOT NULL,
address_line_1 tinytext COLLATE utf8_unicode_ci NOT NULL,
address_line_2 tinytext COLLATE utf8_unicode_ci NOT NULL,
address_town tinytext COLLATE utf8_unicode_ci NOT NULL,
address_postcode tinytext COLLATE utf8_unicode_ci NOT NULL,
moved_in date NOT NULL,
vacated date NOT NULL DEFAULT '0000-00-00',
ptcabs tinytext COLLATE utf8_unicode_ci NOT NULL,
resident_status tinytext COLLATE utf8_unicode_ci NOT NULL,
created datetime NOT NULL,
edited datetime NOT NULL,
deleted datetime NOT NULL,
PRIMARY KEY (id),
KEY customer_id (customer_id,deleted)
) ;


mysql> describe cl_application
-> ;
+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------+-----+---------------------+----------------+
| Field | Type | Null | Key | Default | Extra |
+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------+-----+---------------------+----------------+
| id | int(11) | NO | PRI | NULL | auto_increment |
| customer_id | int(11) | NO | | NULL | |
| product_type | tinytext | YES | | NULL | |
| introducer_id | int(11) | NO | | NULL | |
| loan_amount | double | NO | | NULL | |
| loan_purpose | tinytext | NO | | NULL | |
| comments | text | NO | | NULL | |
| security_value | double | NO | | NULL | |
| property_address_1 | tinytext | NO | | NULL | |
| property_address_2 | tinytext | NO | | NULL | |
| property_town_city | tinytext | NO | | NULL | |
| property_postcode | varchar(8) | NO | | NULL | |
| property_country | tinytext | NO | | NULL | |
| application_source | tinytext | NO | | NULL | |
| status | enum('WK - Working Lead','APP - Application Taken','ISS - Pack Issued','HOT - Head Of Terms Sent','APU - Application underway','OFI - Offer Issued','DIP - Deal in Progress','DUS - Declined Unsecured/Trying Secured','DUG - Declined Unsecured/Trying Guarantor','COM - Completed Awaiting Payment','PAC - Paid and Completed','TD - Turned Down','DUP - Duplicate application') | NO | | WK - Working Lead | |
| multi_stage_status | text | NO | | NULL | |
| owner_id | int(11) | NO | | NULL | |
| token | tinytext | NO | | NULL | |
| cache_keyword | text | NO | MUL | NULL | |
| old_id | int(11) | NO | | NULL | |
| placed_with | tinytext | NO | | NULL | |
| submitted | datetime | NO | | NULL | |
| database | enum('LV','TD','CD','UL') | NO | | LV | |
| snoozed | datetime | NO | | 0000-00-00 00:00:00 | |
| created | datetime | NO | | NULL | |
| edited | datetime | NO | | NULL | |
| deleted | datetime | NO | | NULL | |
+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------+-----+---------------------+----------------+
27 rows in set (0.00 sec)


--

mysql> describe cl_application_business_details;
+-----------------+-----------------------------------------------+------+-----+---------+----------------+
| Field | Type | Null | Key | Default | Extra |
+-----------------+-----------------------------------------------+------+-----+---------+----------------+
| id | int(11) | NO | PRI | NULL | auto_increment |
| company_number | tinytext | YES | | NULL | |
| legal_structure | enum('Ltd','LLP','Partnership','Sole Trader') | YES | | NULL | |
| incorporated | date | YES | | NULL | |
| created | datetime | NO | | NULL | |
| edited | datetime | NO | | NULL | |
| deleted | datetime | NO | | NULL | |
+-----------------+-----------------------------------------------+------+-----+---------+----------------+
7 rows in set (0.00 sec)


--

mysql> describe cl_customer;
+-------------------+------------------------------+------+-----+---------+----------------+
| Field | Type | Null | Key | Default | Extra |
+-------------------+------------------------------+------+-----+---------+----------------+
| id | int(11) | NO | PRI | NULL | auto_increment |
| title | enum('Mr','Mrs','Ms','Miss') | NO | | NULL | |
| name_first | tinytext | NO | | NULL | |
| name_last | tinytext | NO | | NULL | |
| dob | date | NO | | NULL | |
| marital_status | tinytext | NO | | NULL | |
| name_company | tinytext | NO | | NULL | |
| email | tinytext | NO | | NULL | |
| alt_email | tinytext | YES | | NULL | |
| contact_number | tinytext | NO | | NULL | |
| home_phone_number | tinytext | NO | | NULL | |
| work_phone_number | tinytext | NO | | NULL | |
| created | datetime | NO | | NULL | |
| edited | datetime | NO | | NULL | |
| deleted | datetime | NO | | NULL | |
+-------------------+------------------------------+------+-----+---------+----------------+
15 rows in set (0.00 sec)


--

mysql> describe cl_application_packager;
+----------------+-------------------------------------------------------+------+-----+-------------+----------------+
| Field | Type | Null | Key | Default | Extra |
+----------------+-------------------------------------------------------+------+-----+-------------+----------------+
| id | int(11) | NO | PRI | NULL | auto_increment |
| application_id | int(11) | NO | | NULL | |
| packager | tinytext | YES | | NULL | |
| packager_id | int(11) | YES | | NULL | |
| amount | double | YES | | NULL | |
| status | enum('In Progress','Declined','Accepted','Completed') | YES | | In Progress | |
| commision | double | YES | | NULL | |
| created | datetime | NO | | NULL | |
| edited | datetime | NO | | NULL | |
| deleted | datetime | NO | MUL | NULL | |
+----------------+-------------------------------------------------------+------+-----+-------------+----------------+
10 rows in set (0.00 sec)

mysql> describe cl_customer_address;
+------------------+----------+------+-----+------------+----------------+
| Field | Type | Null | Key | Default | Extra |
+------------------+----------+------+-----+------------+----------------+
| id | int(11) | NO | PRI | NULL | auto_increment |
| customer_id | int(11) | NO | MUL | NULL | |
| application_id | int(11) | NO | | NULL | |
| house_name | tinytext | NO | | NULL | |
| house_number | tinytext | NO | | NULL | |
| address_line_1 | tinytext | NO | | NULL | |
| address_line_2 | tinytext | NO | | NULL | |
| address_town | tinytext | NO | | NULL | |
| address_postcode | tinytext | NO | | NULL | |
| moved_in | date | NO | | NULL | |
| vacated | date | NO | | 0000-00-00 | |
| ptcabs | tinytext | NO | | NULL | |
| resident_status | tinytext | NO | | NULL | |
| created | datetime | NO | | NULL | |
| edited | datetime | NO | | NULL | |
| deleted | datetime | NO | | NULL | |
+------------------+----------+------+-----+------------+----------------+
16 rows in set (0.00 sec)


Explain Output:

id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <derived2> ALL 178913936 Using filesort
1 PRIMARY ca2 eq_ref PRIMARY PRIMARY 4 s.address_id 1 Using where
2 DERIVED a ALL PRIMARY, cache_keyword 14494 Using where; Using temporary; Using filesort
2 DERIVED bd eq_ref PRIMARY PRIMARY 4 s-choiceloans.a.id 1
2 DERIVED c eq_ref PRIMARY PRIMARY 4 s-choiceloans.a.customer_id 1 Using where
2 DERIVED ap ALL 12344 Using where; Using join buffer (Block Nested Loop)
3 DEPENDENT SUBQUERY ca1 ref customer_id customer_id 9 func,const 1 Using where; Using filesort

Answer

The biggest suspect is the inline view (or derived table, in the MySQL venacular.) MySQL is going to materialize the derived table, and then the outer query will run against that.

The other suspect is the correlated subquery in the SELECT list. That's going to be executed for every row returned by the outer query.

Also, the ORDER BY app_created is going to require a Using filesort operation (unless you're running a more recent version of MySQL that might build an index on the derived table.)

There's some odd predicates, e.g. c.deleted = c.deleted

That's going to be true for every row that has a non-NULL value in the deleted column. That's equivalent to c.deleted IS NOT NULL.

And for this: ca2.deleted = ca2.deleted OR ca2.deleted IS NULL, that's always going to be true.

Neither of those should have much impact on performance however.


Run EXPLAIN to see the execution plan.

Absent that, here's a first cut at some recommendations:

Add a covering index on c1_application_packager

  CREATE INDEX c1_application_packager_IX2 ON c1_application_packager 
    (application_id, deleted, status, packager)
  ;

Add a covering index on cl_customer_address

  CREATE INDEX c1_customer_address_IX2 ON c1_customer_address
    (customer_id, deleted, created, id, address_postcode, resident_status)
  ;

And re-write the query to eliminate the derived table. Replace the join to ca2 with the same correlated subquery you used to return the id from c1_customer_address...

   SELECT a.id
        , c.name_first
        , c.name_last
        , a.created           AS app_created
        , a.edited
        , a.comments          AS custcomment
        , c.name_company
        , a.loan_amount
        , a.product_type
        , ap.packager         AS placed_with
        , a.introducer_id
        , a.status
        , c.contact_number
        , c.email
        , a.database
        , bd.legal_structure
        , ( SELECT ca1.id
              FROM cl_customer_address ca1
             WHERE ca1.customer_id = a.customer_id
               AND ca1.deleted = '0000-00-00 00:00:00'
             ORDER BY ca1.customer_id, ca1.deleted, ca1.created DESC
             LIMIT 1
          ) AS address_id
        , ( SELECT ca2.address_postcode
              FROM cl_customer_address ca2
             WHERE ca2.customer_id = a.customer_id
               AND ca2.deleted = '0000-00-00 00:00:00'
             ORDER BY ca2.customer_id, ca2.deleted, ca2.created DESC
             LIMIT 1
          ) AS address_postcode
        , ( SELECT ca3.resident_status
              FROM cl_customer_address ca3
             WHERE ca3.customer_id = a.customer_id
               AND ca3.deleted = '0000-00-00 00:00:00'
             ORDER BY ca3.customer_id, ca3.deleted, ca3.created DESC
             LIMIT 1
          ) AS resident_status
   FROM cl_application a
   LEFT
   JOIN cl_application_business_details bd
     ON bd.id = a.id
   LEFT
   JOIN cl_customer c
     ON c.id = a.customer_id
    AND c.deleted IS NOT NULL
   LEFT
   JOIN cl_application_packager ap
     ON ap.application_id = a.id
    AND ap.status != 'Declined'
    AND ap.deleted = '0000-00-00 00:00:00'
  WHERE a.deleted = '0000-00-00 00:00:00'
  GROUP BY a.created, a.id
  ORDER BY a.created, a.id

Those correlated subqueries in the SELECT list are going to be executed for every row returned by the query, so that's going to be expensive.

Now we want to see if we can get an index on c1_application table that will help us avoid a Using filesort operation (to satisfy the ORDER BY and the GROUP BY clauses.)

  CREATE INDEX c1_application_IX2 ON c1_application
    (deleted, created, id)
  ;

The query relies on a MySQL-specific extension to GROUP BY behavior, not throwing an error due to non-aggregates in the SELECT list which don't appear in the GROUP BY. If there is more than one "matching" row from c1_customer or c1_application_packager, which of the rows is returned out of the GROUP BY operation is indeterminate.

No guarantee that these changes will have a positive impact on performance. (Performance could be a whole lot worse.)

Again, run EXPLAIN to see the execution plan, and adjust from there.

The next big hump is those correlated subqueries. For a shot at decent performance, it's imperative that a suitable index is available. (A proposal for a suitable covering index already specified above.)

The next cut would be eliminate the correlated subqueries from the SELECT list, If the id column from c1_customer_address was being returned as a means to get the address_postcode and resident_status, that first correlated subquery could be eliminated.

FOLLOWUP

Removing the correlated subqueries... adding an inline view lc to get the latest created date from c1_customer_address (for each customer_id), and another join to c1_customer_address to retrieve the rows with that latest created date (for each customer).

The lc inline view introduces a "derived table", which can be expensive for large sets, but this may be faster than using correlated subqueries.

  SELECT a.id
       , c.name_first
       , c.name_last
       , a.created             AS app_created
       , a.edited
       , a.comments            AS custcomment
       , c.name_company
       , a.loan_amount
       , a.product_type
       , ap.packager           AS placed_with
       , a.introducer_id
       , a.status
       , c.contact_number
       , c.email
       , a.database
       , bd.legal_structure
       , ca.id                 AS address_id
       , ca.address_postcode
       , ca.resident_status 
  FROM cl_application a
  LEFT
  JOIN cl_application_business_details bd
    ON bd.id = a.id
  LEFT
  JOIN cl_customer c
    ON c.id = a.customer_id
   AND c.deleted IS NOT NULL
  LEFT
  JOIN cl_application_packager ap
    ON ap.application_id = a.id
   AND ap.status != 'Declined'
   AND ap.deleted = '0000-00-00 00:00:00'
  LEFT
  JOIN ( SELECT ca1.customer_id
              , MAX(ca1.created) AS latest_created
           FROM cl_customer_address ca1
          WHERE ca1.deleted = '0000-00-00 00:00:00'
          GROUP BY ca1.customer_id
       ) lc
    ON lc.customer_id = a.customer_id
  LEFT
  JOIN cl_customer_address ca
    ON ca.customer_id = lc.customer_id
   AND ca.created     = lc.latest_created
   AND ca.deleted     = '0000-00-00 00:00:00'
 WHERE a.deleted = '0000-00-00 00:00:00'
 GROUP BY a.created, a.id
 ORDER BY a.created, a.id
Comments