I've been trying to create a feature analysis for our app by counting specific attributes of users from given database tables and store it in an analytical view via SQL CTE's
As soon as I add more than two CTEs to my data model the final output changes from the original correct output.
For example when I run in the beginning:
WITH USER_INFO AS
(
SELECT
ID AS ACCOUNT_ID,
STATUS,
CREATED_AT AS REGISTRATION_DATE
FROM
LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.ACCOUNT
WHERE
STATUS IS NULL
),
WEIGHT_LOG AS
(
SELECT
wl.ID AS WEIGHT_LOG_ID,
wl.ACCOUNT_ID,
wl.WEIGHT,
wl.CREATED_AT
FROM
LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.WEIGHT_LOG wl
LEFT JOIN
USER_INFO ui ON wl.ACCOUNT_ID = ui.ACCOUNT_ID
WHERE
wl.CREATED_AT > ui.REGISTRATION_DATE
)
SELECT
ui.ACCOUNT_ID,
COUNT(wl.WEIGHT_LOG_ID) AS WEIGHT_LOG_COUNT,
FROM
USER_INFO ui
LEFT JOIN
WEIGHT_LOG wl ON ui.ACCOUNT_ID = wl.ACCOUNT_ID
GROUP BY
ui.ACCOUNT_ID
ORDER BY
ui.ACCOUNT_ID;
I get the correct count of rows for each user in the weight_log
table in our data base minus the entry that is being generated during the onboarding process (registration date).
Pre_Join Table View
Account_ID 879 = 4
Account_ID 881 = 1
Now when I try to add another column to my final select statement Reflection_Count
which represents the total count of distinct reflections a user has completed my data for my weight log users change and is wrong.
WITH USER_INFO AS
(
SELECT
ID AS ACCOUNT_ID,
STATUS,
CREATED_AT AS REGISTRATION_DATE
FROM
LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.ACCOUNT
WHERE
STATUS IS NULL
),
WEIGHT_LOG AS
(
SELECT
wl.ID AS WEIGHT_LOG_ID,
wl.ACCOUNT_ID,
wl.WEIGHT,
wl.CREATED_AT
FROM
LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.WEIGHT_LOG wl
LEFT JOIN
USER_INFO ui ON wl.ACCOUNT_ID = ui.ACCOUNT_ID
WHERE
wl.CREATED_AT > ui.REGISTRATION_DATE
),
REFLECTIONS AS
(
WITH REFLECTION_PROGRESS_LOG AS
(
SELECT
ACCOUNT_ID,
ID AS REFLECTION_PROGRESS_ID,
EXTERNAL_REFLECTION_ID,
CREATED_AT
FROM
LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.REFLECTION_PROGRESS
ORDER BY
ACCOUNT_ID
),
REFLECTION_DETAILS AS
(
SELECT
ID AS REFLECTION_ID,
EXTERNAL_ID,
TITLE AS REFLECTION_TITLE,
LANGUAGE
FROM
LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.REFLECTION
WHERE
LANGUAGE = 'en'
)
SELECT
rpl.ACCOUNT_ID,
rpl.REFLECTION_PROGRESS_ID,
rpl.EXTERNAL_REFLECTION_ID,
r.REFLECTION_TITLE,
rpl.CREATED_AT
FROM
REFLECTION_PROGRESS_LOG rpl
LEFT JOIN
REFLECTION_DETAILS r ON rpl.EXTERNAL_REFLECTION_ID = r.EXTERNAL_ID
ORDER BY
ACCOUNT_ID,
REFLECTION_PROGRESS_ID
)
SELECT
ui.ACCOUNT_ID,
COUNT(wl.WEIGHT_LOG_ID) AS WEIGHT_LOG_COUNT,
COUNT(DISTINCT r.EXTERNAL_REFLECTION_ID) AS REFLECTION_COUNT
FROM
USER_INFO ui
LEFT JOIN
WEIGHT_LOG wl ON ui.ACCOUNT_ID = wl.ACCOUNT_ID
LEFT JOIN
REFLECTIONS r ON ui.ACCOUNT_ID = r.ACCOUNT_ID
GROUP BY
ui.ACCOUNT_ID
ORDER BY
ui.ACCOUNT_ID;
Post_Join Table View
Account_ID 879 = 12
Account_ID 881 = 1
I can't seem to locate the issue and would appreciate any insights.
Thanks
I've been trying to create a feature analysis for our app by counting specific attributes of users from given database tables and store it in an analytical view via SQL CTE's
As soon as I add more than two CTEs to my data model the final output changes from the original correct output.
For example when I run in the beginning:
WITH USER_INFO AS
(
SELECT
ID AS ACCOUNT_ID,
STATUS,
CREATED_AT AS REGISTRATION_DATE
FROM
LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.ACCOUNT
WHERE
STATUS IS NULL
),
WEIGHT_LOG AS
(
SELECT
wl.ID AS WEIGHT_LOG_ID,
wl.ACCOUNT_ID,
wl.WEIGHT,
wl.CREATED_AT
FROM
LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.WEIGHT_LOG wl
LEFT JOIN
USER_INFO ui ON wl.ACCOUNT_ID = ui.ACCOUNT_ID
WHERE
wl.CREATED_AT > ui.REGISTRATION_DATE
)
SELECT
ui.ACCOUNT_ID,
COUNT(wl.WEIGHT_LOG_ID) AS WEIGHT_LOG_COUNT,
FROM
USER_INFO ui
LEFT JOIN
WEIGHT_LOG wl ON ui.ACCOUNT_ID = wl.ACCOUNT_ID
GROUP BY
ui.ACCOUNT_ID
ORDER BY
ui.ACCOUNT_ID;
I get the correct count of rows for each user in the weight_log
table in our data base minus the entry that is being generated during the onboarding process (registration date).
Pre_Join Table View
Account_ID 879 = 4
Account_ID 881 = 1
Now when I try to add another column to my final select statement Reflection_Count
which represents the total count of distinct reflections a user has completed my data for my weight log users change and is wrong.
WITH USER_INFO AS
(
SELECT
ID AS ACCOUNT_ID,
STATUS,
CREATED_AT AS REGISTRATION_DATE
FROM
LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.ACCOUNT
WHERE
STATUS IS NULL
),
WEIGHT_LOG AS
(
SELECT
wl.ID AS WEIGHT_LOG_ID,
wl.ACCOUNT_ID,
wl.WEIGHT,
wl.CREATED_AT
FROM
LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.WEIGHT_LOG wl
LEFT JOIN
USER_INFO ui ON wl.ACCOUNT_ID = ui.ACCOUNT_ID
WHERE
wl.CREATED_AT > ui.REGISTRATION_DATE
),
REFLECTIONS AS
(
WITH REFLECTION_PROGRESS_LOG AS
(
SELECT
ACCOUNT_ID,
ID AS REFLECTION_PROGRESS_ID,
EXTERNAL_REFLECTION_ID,
CREATED_AT
FROM
LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.REFLECTION_PROGRESS
ORDER BY
ACCOUNT_ID
),
REFLECTION_DETAILS AS
(
SELECT
ID AS REFLECTION_ID,
EXTERNAL_ID,
TITLE AS REFLECTION_TITLE,
LANGUAGE
FROM
LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.REFLECTION
WHERE
LANGUAGE = 'en'
)
SELECT
rpl.ACCOUNT_ID,
rpl.REFLECTION_PROGRESS_ID,
rpl.EXTERNAL_REFLECTION_ID,
r.REFLECTION_TITLE,
rpl.CREATED_AT
FROM
REFLECTION_PROGRESS_LOG rpl
LEFT JOIN
REFLECTION_DETAILS r ON rpl.EXTERNAL_REFLECTION_ID = r.EXTERNAL_ID
ORDER BY
ACCOUNT_ID,
REFLECTION_PROGRESS_ID
)
SELECT
ui.ACCOUNT_ID,
COUNT(wl.WEIGHT_LOG_ID) AS WEIGHT_LOG_COUNT,
COUNT(DISTINCT r.EXTERNAL_REFLECTION_ID) AS REFLECTION_COUNT
FROM
USER_INFO ui
LEFT JOIN
WEIGHT_LOG wl ON ui.ACCOUNT_ID = wl.ACCOUNT_ID
LEFT JOIN
REFLECTIONS r ON ui.ACCOUNT_ID = r.ACCOUNT_ID
GROUP BY
ui.ACCOUNT_ID
ORDER BY
ui.ACCOUNT_ID;
Post_Join Table View
Account_ID 879 = 12
Account_ID 881 = 1
I can't seem to locate the issue and would appreciate any insights.
Thanks
Share Improve this question edited Mar 7 at 9:30 marc_s 756k184 gold badges1.4k silver badges1.5k bronze badges asked Mar 7 at 9:16 PaulPaul 173 bronze badges 2- 1 I'd try GROUP BY before joining. – jarlh Commented Mar 7 at 10:15
- Please add sample data for all relevant tables, as well as table definitions – Charlieface Commented Mar 7 at 11:13
1 Answer
Reset to default 0The problem is that you are grouping after the join, which means you get a cartesian product between each of the one-to-many tables. You can only group after a join if you are joining a single one-to-many table.
Instead group before joining.
Note also:
LEFT JOIN
where you are not filtering or selecting from that table can be completely elided.COUNT(nonNullableColumn)
andCOUNT(*)
are the same thing.- Many of the CTEs can be elided.
- The
WEIGHT_LOG
CTE uses a join back toUSER_INFO
. This can instead be turned into aLATERAL JOIN
orAPPLY
(depending on DBMS).
WITH REFLECTIONS AS
(
SELECT
rpl.ACCOUNT_ID,
COUNT(*) AS REFLECTION_COUNT
FROM
LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.REFLECTION_PROGRESS rpl
GROUP BY
rpl.ACCOUNT_ID
)
SELECT
ui.ACCOUNT_ID,
wl.WEIGHT_LOG_COUNT,
rpl.REFLECTION_COUNT
FROM
LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.ACCOUNT ui
LEFT JOIN LATERAL
(
SELECT
wl.ACCOUNT_ID,
COUNT(*) AS WEIGHT_LOG_COUNT
FROM
LOOPCARE_SOURCE_DATABASE.LOM_DB_US_PUBLIC.WEIGHT_LOG wl
WHERE
ui.ACCOUNT_ID = wl.ACCOUNT_ID
AND wl.CREATED_AT > ui.REGISTRATION_DATE
GROUP BY
wl.ACCOUNT_ID
) wl ON 1=1
LEFT JOIN
REFLECTIONS r ON ui.ACCOUNT_ID = r.ACCOUNT_ID
WHERE
ui.STATUS IS NULL
ORDER BY
ui.ACCOUNT_ID;
发布者:admin,转转请注明出处:http://www.yc00.com/questions/1744939730a4602248.html
评论列表(0条)