lubridate
lubridate copied to clipboard
Spanish month labels
If I use Spanish or French Locale for abbreviated version of the month labels, these are displayed with "ene\.", but it should be only "ene.". I'm using Windows 10.
> library(lubridate)
> Sys.getlocale("LC_TIME")
## [1] "Spanish_Spain.1252"
> dt <- seq(ymd("2018-01-01"), ymd("2018-12-31"), "day")
> head(month(dt, label = TRUE))
## [1] ene\\. ene\\. ene\\. ene\\. ene\\. ene\\.
## 12 Levels: ene\\. < feb\\. < mar\\. < abr\\. < may\\. < ... < dic\\.
> Sys.setlocale("LC_TIME", "French")
## [1] "French_France.1252"
> head(month(dt, label = TRUE))
## [1] janv\\. janv\\. janv\\. janv\\. janv\\. janv\\.
## 12 Levels: janv\\. < févr\\. < mars < avr\\. < mai < juin < ... < déc\\.
> Sys.setlocale("LC_TIME", "English")
## [1] "English_United States.1252"
> head(month(dt, label = TRUE))
## [1] Jan Jan Jan Jan Jan Jan
## 12 Levels: Jan < Feb < Mar < Apr < May < Jun < Jul < Aug < Sep < ... < Dec
This appears to be because the month names are regular expressions; so we also need to store canonical names for output.
This is surely a Windows only issue. Probably some regex bug indeed.
@dominicroye could you please provide the output of the following (with local replaced by your French and Spanish locale names)?
Sys.setlocale("LC_TIME", "es_ES.utf8")
format <- "%a@%A@%b@%B@%p@"
enc2utf8(unique(format(lubridate:::.date_template, format = format)))
## [1] "jue@jueves@ene@enero@@" "lun@lunes@feb@febrero@@" "mar@martes@mar@marzo@@"
## [4] "dom@domingo@abr@abril@@" "vie@viernes@may@mayo@@" "mar@martes@jun@junio@@"
## [7] "vie@viernes@jul@julio@@" "mié@miércoles@ago@agosto@@" "mar@martes@sep@septiembre@@"
## [10] "vie@viernes@oct@octubre@@" "mar@martes@nov@noviembre@@" "sáb@sábado@dic@diciembre@@"
Also the value of
str(.get_locale_regs("...your_locales..."))
es_ES.utf8 doesn't exist in Windows.
Here is my output from your code:
SPANISH
> Sys.setlocale("LC_TIME", "Spanish_Spain.1252")
> format <- "%a@%A@%b@%B@%p@"
> enc2utf8(unique(format(lubridate:::.date_template, format = format)))
[1] "ju.@jueves@ene.@enero@@" "lu.@lunes@feb.@febrero@@" "ma.@martes@mar.@marzo@@"
[4] "do.@domingo@abr.@abril@@" "vi.@viernes@may.@mayo@@" "ma.@martes@jun.@junio@@"
[7] "vi.@viernes@jul.@julio@@" "mi.@miércoles@ago.@agosto@@" "ma.@martes@sep.@septiembre@@"
[10] "vi.@viernes@oct.@octubre@@" "ma.@martes@nov.@noviembre@@" "sá.@sábado@dic.@diciembre@@"
> str(lubridate:::.get_locale_regs("Spanish_Spain.1252"))
List of 6
$ alpha_flex : Named chr [1:6] "((?<b_b>ene\\.|feb\\.|mar\\.|abr\\.|may\\.|jun\\.|jul\\.|ago\\.|sep\\.|oct\\.|nov\\.|dic\\.)|(?<B_b>enero|febre"| __truncated__ "(?<B_B>enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|octubre|noviembre|diciembre)(?![[:alpha:]])" "((?<a_a>ju\\.|lu\\.|ma\\.|do\\.|vi\\.|mi\\.|sá\\.)|(?<A_a>jueves|lunes|martes|domingo|viernes|miércoles|sábado)"| __truncated__ "(?<A_A>jueves|lunes|martes|domingo|viernes|miércoles|sábado)(?![[:alpha:]])" ...
..- attr(*, "names")= chr [1:6] "b" "B" "a" "A" ...
$ num_flex : Named chr [1:24] "(?<d>[012]?[1-9]|3[01]|[12]0)(?!\\d)" "(?<q>[0]?[1-4])(?!\\d)" "(?<H>2[0-4]|[01]?\\d)(?!\\d)" "(?<H>2[0-4]|[01]?\\d)(?!\\d)" ...
..- attr(*, "names")= chr [1:24] "d" "q" "H" "h" ...
$ alpha_exact: Named chr [1:6] "((?<b_b_e>ene\\.|feb\\.|mar\\.|abr\\.|may\\.|jun\\.|jul\\.|ago\\.|sep\\.|oct\\.|nov\\.|dic\\.)|(?<B_b_e>enero|f"| __truncated__ "(?<B_B_e>enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|octubre|noviembre|diciembre)(?![[:alpha:]])" "((?<a_a_e>ju\\.|lu\\.|ma\\.|do\\.|vi\\.|mi\\.|sá\\.)|(?<A_a_e>jueves|lunes|martes|domingo|viernes|miércoles|sáb"| __truncated__ "(?<A_A_e>jueves|lunes|martes|domingo|viernes|miércoles|sábado)(?![[:alpha:]])" ...
..- attr(*, "names")= chr [1:6] "b" "B" "a" "A" ...
$ num_exact : Named chr [1:24] "(?<d_e>[012][1-9]|3[01]|[12]0)" "(?<q_e>[0][1-4])" "(?<H_e>2[0-4]|[01]\\d)" "(?<H_e>2[0-4]|[01]\\d)" ...
..- attr(*, "names")= chr [1:24] "d" "q" "H" "h" ...
$ wday_names :List of 2
..$ abr : chr [1:7] "do\\." "lu\\." "ma\\." "mi\\." ...
..$ full: chr [1:7] "domingo" "lunes" "martes" "miércoles" ...
$ month_names:List of 2
..$ abr : chr [1:12] "ene\\." "feb\\." "mar\\." "abr\\." ...
..$ full: chr [1:12] "enero" "febrero" "marzo" "abril" ...
FRENCH
> Sys.setlocale("LC_TIME", "French_France.1252")
> format <- "%a@%A@%b@%B@%p@"
> enc2utf8(unique(format(lubridate:::.date_template, format = format)))
[1] "jeu.@jeudi@janv.@janvier@@" "lun.@lundi@févr.@février@@" "mar.@mardi@mars@mars@@"
[4] "dim.@dimanche@avr.@avril@@" "ven.@vendredi@mai@mai@@" "mar.@mardi@juin@juin@@"
[7] "ven.@vendredi@juil.@juillet@@" "mer.@mercredi@août@août@@" "mar.@mardi@sept.@septembre@@"
[10] "ven.@vendredi@oct.@octobre@@" "mar.@mardi@nov.@novembre@@" "sam.@samedi@déc.@décembre@@"
> str(lubridate:::.get_locale_regs("French_France.1252"))
List of 6
$ alpha_flex : Named chr [1:6] "((?<b_b>janv\\.|févr\\.|mars|avr\\.|mai|juin|juil\\.|août|sept\\.|oct\\.|nov\\.|déc\\.)|(?<B_b>janvier|février|"| __truncated__ "(?<B_B>janvier|février|mars|avril|mai|juin|juillet|août|septembre|octobre|novembre|décembre)(?![[:alpha:]])" "((?<a_a>jeu\\.|lun\\.|mar\\.|dim\\.|ven\\.|mer\\.|sam\\.)|(?<A_a>jeudi|lundi|mardi|dimanche|vendredi|mercredi|s"| __truncated__ "(?<A_A>jeudi|lundi|mardi|dimanche|vendredi|mercredi|samedi)(?![[:alpha:]])" ...
..- attr(*, "names")= chr [1:6] "b" "B" "a" "A" ...
$ num_flex : Named chr [1:24] "(?<d>[012]?[1-9]|3[01]|[12]0)(?!\\d)" "(?<q>[0]?[1-4])(?!\\d)" "(?<H>2[0-4]|[01]?\\d)(?!\\d)" "(?<H>2[0-4]|[01]?\\d)(?!\\d)" ...
..- attr(*, "names")= chr [1:24] "d" "q" "H" "h" ...
$ alpha_exact: Named chr [1:6] "((?<b_b_e>janv\\.|févr\\.|mars|avr\\.|mai|juin|juil\\.|août|sept\\.|oct\\.|nov\\.|déc\\.)|(?<B_b_e>janvier|févr"| __truncated__ "(?<B_B_e>janvier|février|mars|avril|mai|juin|juillet|août|septembre|octobre|novembre|décembre)(?![[:alpha:]])" "((?<a_a_e>jeu\\.|lun\\.|mar\\.|dim\\.|ven\\.|mer\\.|sam\\.)|(?<A_a_e>jeudi|lundi|mardi|dimanche|vendredi|mercre"| __truncated__ "(?<A_A_e>jeudi|lundi|mardi|dimanche|vendredi|mercredi|samedi)(?![[:alpha:]])" ...
..- attr(*, "names")= chr [1:6] "b" "B" "a" "A" ...
$ num_exact : Named chr [1:24] "(?<d_e>[012][1-9]|3[01]|[12]0)" "(?<q_e>[0][1-4])" "(?<H_e>2[0-4]|[01]\\d)" "(?<H_e>2[0-4]|[01]\\d)" ...
..- attr(*, "names")= chr [1:24] "d" "q" "H" "h" ...
$ wday_names :List of 2
..$ abr : chr [1:7] "dim\\." "lun\\." "mar\\." "mer\\." ...
..$ full: chr [1:7] "dimanche" "lundi" "mardi" "mercredi" ...
$ month_names:List of 2
..$ abr : chr [1:12] "janv\\." "févr\\." "mars" "avr\\." ...
..$ full: chr [1:12] "janvier" "février" "mars" "avril" ...
Ok, so on Windows all the abbreviations come with dots at the end. Let me see what I can do.
Should have been fixed. Would really appreciate if you guys could try the dev version and let me know if it works correctly now.
It is working correctly. Thank you!
I have to reopen this issue since it is still happening with weekdays. I am sorry that I noticed it now!
> library(lubridate)
> Sys.getlocale("LC_TIME")
[1] "Spanish_Spain.1252"
> dt <- seq(ymd("2018-01-01"), ymd("2018-12-31"), "day")
> head(wday(dt, label = TRUE))
[1] lu\\. ma\\. mi\\. ju\\. vi\\. sá\\.
Levels: do\\. < lu\\. < ma\\. < mi\\. < ju\\. < vi\\. < sá\\.
I confirm this bug. However, the solution for the guess_formats
(https://github.com/tidyverse/lubridate/commit/cc5f1a6de86863f983fd3f69ac842c31997a03a0) function works and can be easily implemented in .get_locale_regs
which is what is used in the wday
function.
It is necessary change this line (https://github.com/tidyverse/lubridate/blob/6f26b02de432cd9373ad4ce7766c36eacfc29918/R/guess.r#L311) by this:
mat[] <- gsub("\\.$", "", mat) # remove abbrev trailing dot in some locales (#781)
mat[] <- gsub("([].|(){^$*+?[])", "\\\\\\1", mat) # escaping meta chars
I imagine this works correctly with clock, since we don't do anything with regular expressions:
library(clock)
dt <- seq(date_parse("2018-01-01"), date_parse("2018-12-31"), "day")
head(date_month_factor(dt, labels = "es", abbreviate = TRUE))
#> [1] ene. ene. ene. ene. ene. ene.
#> 12 Levels: ene. < feb. < mar. < abr. < may. < jun. < jul. < ago. < ... < dic.
head(date_weekday_factor(dt, labels = "es", abbreviate = TRUE))
#> [1] lun. mar. mié. jue. vie. sáb.
#> Levels: dom. < lun. < mar. < mié. < jue. < vie. < sáb.
If the labels aren't exactly what you expect, you can always create a custom clock_labels()
object to use as the labels
argument
Hi, I found that this bug is still alive for month abbreviations different from their equivalent in English (jan != ene, apr != abr, aug != ago, dec != dic). The bug isn't SO specific: I reproduced it in Linux and Windows: Windows:
library(tidyverse)
library(lubridate)
test_dates <- tibble(abr_dates = c("ene-22",
"feb-22",
"mar-22",
"abr-22",
"may-22",
"jun-22",
"jul-22",
"ago-22",
"sep-22",
"oct-22",
"nov-22",
"dic-22"))
test_dates %>%
mutate(dates_date = my(abr_dates))
#> Warning: There was 1 warning in `mutate()`.
#> ℹ In argument: `dates_date = my(abr_dates)`.
#> Caused by warning:
#> ! 4 failed to parse.
#> # A tibble: 12 × 2
#> abr_dates dates_date
#> <chr> <date>
#> 1 ene-22 NA
#> 2 feb-22 2022-02-01
#> 3 mar-22 2022-03-01
#> 4 abr-22 NA
#> 5 may-22 2022-05-01
#> 6 jun-22 2022-06-01
#> 7 jul-22 2022-07-01
#> 8 ago-22 NA
#> 9 sep-22 2022-09-01
#> 10 oct-22 2022-10-01
#> 11 nov-22 2022-11-01
#> 12 dic-22 NA
Created on 2023-06-11 with reprex v2.0.2
Session info
sessionInfo()
#> R version 4.3.0 (2023-04-21 ucrt)
#> Platform: x86_64-w64-mingw32/x64 (64-bit)
#> Running under: Windows 11 x64 (build 22621)
#>
#> Matrix products: default
#>
#>
#> locale:
#> [1] LC_COLLATE=Spanish_Colombia.utf8 LC_CTYPE=Spanish_Colombia.utf8
#> [3] LC_MONETARY=Spanish_Colombia.utf8 LC_NUMERIC=C
#> [5] LC_TIME=Spanish_Colombia.utf8
#>
#> time zone: America/Bogota
#> tzcode source: internal
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] lubridate_1.9.2 forcats_1.0.0 stringr_1.5.0 dplyr_1.1.2
#> [5] purrr_1.0.1 readr_2.1.4 tidyr_1.3.0 tibble_3.2.1
#> [9] ggplot2_3.4.2 tidyverse_2.0.0
#>
#> loaded via a namespace (and not attached):
#> [1] gtable_0.3.3 compiler_4.3.0 reprex_2.0.2 tidyselect_1.2.0
#> [5] scales_1.2.1 yaml_2.3.7 fastmap_1.1.1 R6_2.5.1
#> [9] generics_0.1.3 knitr_1.43 munsell_0.5.0 pillar_1.9.0
#> [13] tzdb_0.4.0 rlang_1.1.1 utf8_1.2.3 stringi_1.7.12
#> [17] xfun_0.39 fs_1.6.2 timechange_0.2.0 cli_3.6.1
#> [21] withr_2.5.0 magrittr_2.0.3 digest_0.6.31 grid_4.3.0
#> [25] rstudioapi_0.14 hms_1.1.3 lifecycle_1.0.3 vctrs_0.6.2
#> [29] evaluate_0.21 glue_1.6.2 fansi_1.0.4 colorspace_2.1-0
#> [33] rmarkdown_2.22 tools_4.3.0 pkgconfig_2.0.3 htmltools_0.5.5
Linux:
library(reprex)
library(tidyverse)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
test_dates <- tibble(abr_dates = c("ene-22",
"feb-22",
"mar-22",
"abr-22",
"may-22",
"jun-22",
"jul-22",
"ago-22",
"sep-22",
"oct-22",
"nov-22",
"dic-22"))
test_dates %>%
mutate(dates_date = my(abr_dates))
#> Warning: 4 failed to parse.
#> # A tibble: 12 × 2
#> abr_dates dates_date
#> <chr> <date>
#> 1 ene-22 NA
#> 2 feb-22 2022-02-01
#> 3 mar-22 2022-03-01
#> 4 abr-22 NA
#> 5 may-22 2022-05-01
#> 6 jun-22 2022-06-01
#> 7 jul-22 2022-07-01
#> 8 ago-22 NA
#> 9 sep-22 2022-09-01
#> 10 oct-22 2022-10-01
#> 11 nov-22 2022-11-01
#> 12 dic-22 NA
Created on 2023-06-11 with reprex v2.0.2
Session info
sessionInfo()
#> R version 4.2.1 (2022-06-23)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 22.04.2 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
#>
#> locale:
#> [1] LC_CTYPE=es_CO.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=es_CO.UTF-8 LC_COLLATE=es_CO.UTF-8
#> [5] LC_MONETARY=es_CO.UTF-8 LC_MESSAGES=es_CO.UTF-8
#> [7] LC_PAPER=es_CO.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=es_CO.UTF-8 LC_IDENTIFICATION=C
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] lubridate_1.8.0 forcats_0.5.1 stringr_1.4.0 dplyr_1.0.9
#> [5] purrr_0.3.4 readr_2.1.2 tidyr_1.2.0 tibble_3.1.8
#> [9] ggplot2_3.4.0 tidyverse_1.3.1 reprex_2.0.2
#>
#> loaded via a namespace (and not attached):
#> [1] styler_1.7.0 tidyselect_1.2.0 xfun_0.30 haven_2.5.0
#> [5] colorspace_2.0-3 vctrs_0.5.1 generics_0.1.2 htmltools_0.5.3
#> [9] yaml_2.3.5 utf8_1.2.2 rlang_1.0.6 R.oo_1.25.0
#> [13] pillar_1.8.1 glue_1.6.2 withr_2.5.0 DBI_1.1.2
#> [17] R.utils_2.12.0 dbplyr_2.2.1 readxl_1.4.0 modelr_0.1.8
#> [21] R.cache_0.16.0 lifecycle_1.0.3 cellranger_1.1.0 munsell_0.5.0
#> [25] gtable_0.3.0 rvest_1.0.2 R.methodsS3_1.8.2 evaluate_0.15
#> [29] knitr_1.39 tzdb_0.3.0 fastmap_1.1.0 fansi_1.0.3
#> [33] highr_0.9 broom_0.8.0 backports_1.4.1 scales_1.2.0
#> [37] jsonlite_1.8.0 fs_1.5.2 hms_1.1.1 digest_0.6.29
#> [41] stringi_1.7.6 grid_4.2.1 cli_3.4.1 tools_4.2.1
#> [45] magrittr_2.0.3 crayon_1.5.1 pkgconfig_2.0.3 ellipsis_0.3.2
#> [49] xml2_1.3.3 assertthat_0.2.1 rmarkdown_2.14 httr_1.4.2
#> [53] rstudioapi_0.13 R6_2.5.1 compiler_4.2.1
If you are in linux you need to install the locale.
https://orcacore.com/set-up-system-locale-ubuntu-22-04/