如何在配置单元查询中回顾 7 天
How to go look back 7 days in a hive query
我有一个 sql 需要不断回顾 4 天。此代码每周 运行 一次,因此我需要回顾 7 天。我的 where 子句设置为静止并在两个日期之间查找,但是我需要它一直回顾 7 天。
这是我的代码片段:
WITH gps_traces AS(
SELECT
gtrips.trip_id
, to_date(gtrips.trip_date) as trip_date
, gtrips.fleet_id
, vin.vehicle_vin
, gtrips.driver_id
, gtrips.trip_distance_travelled
, gtrips.trip_duration
, to_timestamp(gdata.trip_timestamp, "yyyy-MM-dd'T'HH:mm:ss") as gps_timestamp
, rank() over
(partition by gtrips.trip_id
order by to_timestamp(gdata.trip_timestamp, "yyyy-MM-dd'T'HH:mm:ss") asc)
as timestamp_rank
, gdata.latitude
, gdata.longitude
, gdata.postcode
FROM
cms.gps_trips gtrips
INNER JOIN
cms.gps_data gdata
ON gtrips.trip_id = gdata.trip_id
INNER JOIN
(
SELECT
DISTINCT --why are there duplicates?
devices.vehicle_id
, devices.vehicle_vin
, devices.data_effective_timestamp
FROM
cms.devices devices
INNER JOIN
(
SELECT
vehicle_id
, max(data_effective_timestamp) as data_effective_timestamp
FROM
cms.devices
GROUP BY
vehicle_id
) max_data_effective
ON devices.vehicle_id = max_data_effective.vehicle_id
AND devices.data_effective_timestamp = max_data_effective.data_effective_timestamp
) vin
WHERE
to_date(gtrips.trip_date) >= "2020-12-11" --Only keeping this date for now
AND
to_date(gtrips.trip_date) <= "2020-12-17"
AND
gtrips.fleet_id = 10211 --Only keeping due for this example
)
SELECT
gps.trip_id
, gps.trip_date
, gps.fleet_id
, gps.vehicle_vin
, gps.driver_id
, gps.trip_distance_travelled
, gps.trip_duration
, gps.gps_timestamp
, gps.latitude
, gps.longitude
, gps.postcode
, gps1.gps_timestamp as next_timestamp
, gps1.latitude as next_latitude
, gps1.longitude as next_longitude
, ACOS(
SIN(RADIANS(gps.latitude))*SIN(RADIANS(gps1.latitude)) +
COS(RADIANS(gps.latitude))*COS(RADIANS(gps1.latitude))*COS(RADIANS(gps1.longitude) - RADIANS(gps.longitude))
)*3958.76 AS COSINES_DISTANCE
, ASIN(
SQRT(
POWER(SIN((RADIANS(gps.latitude) - RADIANS(gps1.latitude))/2), 2) +
COS(RADIANS(gps.latitude))*COS(RADIANS(gps1.latitude))*
POWER(SIN((RADIANS(gps.longitude) - RADIANS(gps1.longitude))/2), 2)
)
)*3958.76*2 AS HAVERSINE_DISTANCE
, (UNIX_TIMESTAMP(gps1.gps_timestamp) - UNIX_TIMESTAMP(gps.gps_timestamp)) AS GPS_INTERVAL
FROM
gps_traces gps
LEFT JOIN
gps_traces gps1
ON gps.trip_id = gps1.trip_id
AND gps.timestamp_rank = (gps1.timestamp_rank - 1)
ORDER BY
gps.fleet_id
, gps.trip_id
, gps.timestamp_rank
具体来说,我需要在此处更改此代码段:
WHERE
to_date(gtrips.trip_date) >= "2020-12-11" --Needs to be rolling 7 days
AND
to_date(gtrips.trip_date) <= "2020-12-17"
我尝试转换日期,但它在 Hive 中失败了。有人可以帮忙吗?
您可以使用 current_date:
WHERE
to_date(gtrips.trip_date) >= date_sub(current_date, 7) --7 days back
AND
to_date(gtrips.trip_date) <= current_date
或者将当前日期作为 -hiveconf 参数传递:
WHERE
to_date(gtrips.trip_date) >= date_sub(to_date('${hiveconf:current_date}'), 7) --7 days back
AND
to_date(gtrips.trip_date) <= to_date('${hiveconf:current_date}')
我有一个 sql 需要不断回顾 4 天。此代码每周 运行 一次,因此我需要回顾 7 天。我的 where 子句设置为静止并在两个日期之间查找,但是我需要它一直回顾 7 天。
这是我的代码片段:
WITH gps_traces AS(
SELECT
gtrips.trip_id
, to_date(gtrips.trip_date) as trip_date
, gtrips.fleet_id
, vin.vehicle_vin
, gtrips.driver_id
, gtrips.trip_distance_travelled
, gtrips.trip_duration
, to_timestamp(gdata.trip_timestamp, "yyyy-MM-dd'T'HH:mm:ss") as gps_timestamp
, rank() over
(partition by gtrips.trip_id
order by to_timestamp(gdata.trip_timestamp, "yyyy-MM-dd'T'HH:mm:ss") asc)
as timestamp_rank
, gdata.latitude
, gdata.longitude
, gdata.postcode
FROM
cms.gps_trips gtrips
INNER JOIN
cms.gps_data gdata
ON gtrips.trip_id = gdata.trip_id
INNER JOIN
(
SELECT
DISTINCT --why are there duplicates?
devices.vehicle_id
, devices.vehicle_vin
, devices.data_effective_timestamp
FROM
cms.devices devices
INNER JOIN
(
SELECT
vehicle_id
, max(data_effective_timestamp) as data_effective_timestamp
FROM
cms.devices
GROUP BY
vehicle_id
) max_data_effective
ON devices.vehicle_id = max_data_effective.vehicle_id
AND devices.data_effective_timestamp = max_data_effective.data_effective_timestamp
) vin
WHERE
to_date(gtrips.trip_date) >= "2020-12-11" --Only keeping this date for now
AND
to_date(gtrips.trip_date) <= "2020-12-17"
AND
gtrips.fleet_id = 10211 --Only keeping due for this example
)
SELECT
gps.trip_id
, gps.trip_date
, gps.fleet_id
, gps.vehicle_vin
, gps.driver_id
, gps.trip_distance_travelled
, gps.trip_duration
, gps.gps_timestamp
, gps.latitude
, gps.longitude
, gps.postcode
, gps1.gps_timestamp as next_timestamp
, gps1.latitude as next_latitude
, gps1.longitude as next_longitude
, ACOS(
SIN(RADIANS(gps.latitude))*SIN(RADIANS(gps1.latitude)) +
COS(RADIANS(gps.latitude))*COS(RADIANS(gps1.latitude))*COS(RADIANS(gps1.longitude) - RADIANS(gps.longitude))
)*3958.76 AS COSINES_DISTANCE
, ASIN(
SQRT(
POWER(SIN((RADIANS(gps.latitude) - RADIANS(gps1.latitude))/2), 2) +
COS(RADIANS(gps.latitude))*COS(RADIANS(gps1.latitude))*
POWER(SIN((RADIANS(gps.longitude) - RADIANS(gps1.longitude))/2), 2)
)
)*3958.76*2 AS HAVERSINE_DISTANCE
, (UNIX_TIMESTAMP(gps1.gps_timestamp) - UNIX_TIMESTAMP(gps.gps_timestamp)) AS GPS_INTERVAL
FROM
gps_traces gps
LEFT JOIN
gps_traces gps1
ON gps.trip_id = gps1.trip_id
AND gps.timestamp_rank = (gps1.timestamp_rank - 1)
ORDER BY
gps.fleet_id
, gps.trip_id
, gps.timestamp_rank
具体来说,我需要在此处更改此代码段:
WHERE
to_date(gtrips.trip_date) >= "2020-12-11" --Needs to be rolling 7 days
AND
to_date(gtrips.trip_date) <= "2020-12-17"
我尝试转换日期,但它在 Hive 中失败了。有人可以帮忙吗?
您可以使用 current_date:
WHERE
to_date(gtrips.trip_date) >= date_sub(current_date, 7) --7 days back
AND
to_date(gtrips.trip_date) <= current_date
或者将当前日期作为 -hiveconf 参数传递:
WHERE
to_date(gtrips.trip_date) >= date_sub(to_date('${hiveconf:current_date}'), 7) --7 days back
AND
to_date(gtrips.trip_date) <= to_date('${hiveconf:current_date}')