polars_compute/cast/
temporal.rs

1use arrow::array::{PrimitiveArray, Utf8ViewArray};
2use arrow::datatypes::{ArrowDataType, TimeUnit};
3use arrow::temporal_conversions::{parse_offset, parse_offset_tz};
4pub use arrow::temporal_conversions::{
5    EPOCH_DAYS_FROM_CE, MICROSECONDS, MICROSECONDS_IN_DAY, MILLISECONDS, MILLISECONDS_IN_DAY,
6    NANOSECONDS, NANOSECONDS_IN_DAY, SECONDS_IN_DAY,
7};
8use chrono::format::{Parsed, StrftimeItems};
9use polars_error::PolarsResult;
10use polars_utils::pl_str::PlSmallStr;
11
12/// Get the time unit as a multiple of a second
13pub const fn time_unit_multiple(unit: TimeUnit) -> i64 {
14    match unit {
15        TimeUnit::Second => 1,
16        TimeUnit::Millisecond => MILLISECONDS,
17        TimeUnit::Microsecond => MICROSECONDS,
18        TimeUnit::Nanosecond => NANOSECONDS,
19    }
20}
21
22fn chrono_tz_utf_to_timestamp(
23    array: &Utf8ViewArray,
24    fmt: &str,
25    time_zone: PlSmallStr,
26    time_unit: TimeUnit,
27) -> PolarsResult<PrimitiveArray<i64>> {
28    let tz = parse_offset_tz(time_zone.as_str())?;
29    Ok(utf8view_to_timestamp_impl(
30        array, fmt, time_zone, tz, time_unit,
31    ))
32}
33
34fn utf8view_to_timestamp_impl<T: chrono::TimeZone>(
35    array: &Utf8ViewArray,
36    fmt: &str,
37    time_zone: PlSmallStr,
38    tz: T,
39    time_unit: TimeUnit,
40) -> PrimitiveArray<i64> {
41    let iter = array
42        .iter()
43        .map(|x| x.and_then(|x| utf8_to_timestamp_scalar(x, fmt, &tz, &time_unit)));
44
45    PrimitiveArray::from_trusted_len_iter(iter)
46        .to(ArrowDataType::Timestamp(time_unit, Some(time_zone)))
47}
48
49/// Parses `value` to `Option<i64>` consistent with the Arrow's definition of timestamp with timezone.
50///
51/// `tz` must be built from `timezone` (either via [`parse_offset`] or `chrono-tz`).
52/// Returns in scale `tz` of `TimeUnit`.
53#[inline]
54pub fn utf8_to_timestamp_scalar<T: chrono::TimeZone>(
55    value: &str,
56    fmt: &str,
57    tz: &T,
58    tu: &TimeUnit,
59) -> Option<i64> {
60    let mut parsed = Parsed::new();
61    let fmt = StrftimeItems::new(fmt);
62    let r = chrono::format::parse(&mut parsed, value, fmt).ok();
63    if r.is_some() {
64        parsed
65            .to_datetime()
66            .map(|x| x.naive_utc())
67            .map(|x| tz.from_utc_datetime(&x))
68            .map(|x| match tu {
69                TimeUnit::Second => x.timestamp(),
70                TimeUnit::Millisecond => x.timestamp_millis(),
71                TimeUnit::Microsecond => x.timestamp_micros(),
72                TimeUnit::Nanosecond => x.timestamp_nanos_opt().unwrap(),
73            })
74            .ok()
75    } else {
76        None
77    }
78}
79
80/// Parses a [`Utf8Array`] to a timeozone-aware timestamp, i.e. [`PrimitiveArray<i64>`] with type `Timestamp(Nanosecond, Some(timezone))`.
81///
82/// # Implementation
83///
84/// * parsed values with timezone other than `timezone` are converted to `timezone`.
85/// * parsed values without timezone are null. Use [`utf8_to_naive_timestamp`] to parse naive timezones.
86/// * Null elements remain null; non-parsable elements are null.
87///
88/// The feature `"chrono-tz"` enables IANA and zoneinfo formats for `timezone`.
89///
90/// # Error
91///
92/// This function errors iff `timezone` is not parsable to an offset.
93pub(crate) fn utf8view_to_timestamp(
94    array: &Utf8ViewArray,
95    fmt: &str,
96    time_zone: PlSmallStr,
97    time_unit: TimeUnit,
98) -> PolarsResult<PrimitiveArray<i64>> {
99    let tz = parse_offset(time_zone.as_str());
100
101    if let Ok(tz) = tz {
102        Ok(utf8view_to_timestamp_impl(
103            array, fmt, time_zone, tz, time_unit,
104        ))
105    } else {
106        chrono_tz_utf_to_timestamp(array, fmt, time_zone, time_unit)
107    }
108}
109
110/// Parses a [`Utf8Array`] to naive timestamp, i.e.
111/// [`PrimitiveArray<i64>`] with type `Timestamp(Nanosecond, None)`.
112/// Timezones are ignored.
113/// Null elements remain null; non-parsable elements are set to null.
114pub(crate) fn utf8view_to_naive_timestamp(
115    array: &Utf8ViewArray,
116    fmt: &str,
117    time_unit: TimeUnit,
118) -> PrimitiveArray<i64> {
119    let iter = array
120        .iter()
121        .map(|x| x.and_then(|x| utf8_to_naive_timestamp_scalar(x, fmt, &time_unit)));
122
123    PrimitiveArray::from_trusted_len_iter(iter).to(ArrowDataType::Timestamp(time_unit, None))
124}
125
126/// Parses `value` to `Option<i64>` consistent with the Arrow's definition of timestamp without timezone.
127/// Returns in scale `tz` of `TimeUnit`.
128#[inline]
129pub fn utf8_to_naive_timestamp_scalar(value: &str, fmt: &str, tu: &TimeUnit) -> Option<i64> {
130    let fmt = StrftimeItems::new(fmt);
131    let mut parsed = Parsed::new();
132    chrono::format::parse(&mut parsed, value, fmt.clone()).ok();
133    parsed
134        .to_naive_datetime_with_offset(0)
135        .map(|x| match tu {
136            TimeUnit::Second => x.and_utc().timestamp(),
137            TimeUnit::Millisecond => x.and_utc().timestamp_millis(),
138            TimeUnit::Microsecond => x.and_utc().timestamp_micros(),
139            TimeUnit::Nanosecond => x.and_utc().timestamp_nanos_opt().unwrap(),
140        })
141        .ok()
142}