lubridate icon indicating copy to clipboard operation
lubridate copied to clipboard

Spanish month labels

Open dominicroye opened this issue 4 years ago • 11 comments

If I use Spanish or French Locale for abbreviated version of the month labels, these are displayed with "ene\.", but it should be only "ene.". I'm using Windows 10.

> library(lubridate)
> Sys.getlocale("LC_TIME")
## [1] "Spanish_Spain.1252"

> dt <- seq(ymd("2018-01-01"), ymd("2018-12-31"), "day")

> head(month(dt, label = TRUE))
## [1] ene\\. ene\\. ene\\. ene\\. ene\\. ene\\.
## 12 Levels: ene\\. < feb\\. < mar\\. < abr\\. < may\\. < ... < dic\\.

> Sys.setlocale("LC_TIME", "French")
## [1] "French_France.1252"
> head(month(dt, label = TRUE))
## [1] janv\\. janv\\. janv\\. janv\\. janv\\. janv\\.
## 12 Levels: janv\\. < févr\\. < mars < avr\\. < mai < juin < ... < déc\\.

> Sys.setlocale("LC_TIME", "English")
## [1] "English_United States.1252"
> head(month(dt, label = TRUE))
## [1] Jan Jan Jan Jan Jan Jan
## 12 Levels: Jan < Feb < Mar < Apr < May < Jun < Jul < Aug < Sep < ... < Dec

dominicroye avatar Jul 20 '19 10:07 dominicroye

This appears to be because the month names are regular expressions; so we also need to store canonical names for output.

hadley avatar Nov 19 '19 23:11 hadley

This is surely a Windows only issue. Probably some regex bug indeed.

@dominicroye could you please provide the output of the following (with local replaced by your French and Spanish locale names)?

Sys.setlocale("LC_TIME", "es_ES.utf8")
format <- "%a@%A@%b@%B@%p@"
enc2utf8(unique(format(lubridate:::.date_template, format = format)))
##  [1] "jue@jueves@ene@enero@@"      "lun@lunes@feb@febrero@@"     "mar@martes@mar@marzo@@"     
##  [4] "dom@domingo@abr@abril@@"     "vie@viernes@may@mayo@@"      "mar@martes@jun@junio@@"     
##  [7] "vie@viernes@jul@julio@@"     "mié@miércoles@ago@agosto@@"  "mar@martes@sep@septiembre@@"
## [10] "vie@viernes@oct@octubre@@"   "mar@martes@nov@noviembre@@"  "sáb@sábado@dic@diciembre@@" 

Also the value of

str(.get_locale_regs("...your_locales..."))

vspinu avatar Dec 01 '19 20:12 vspinu

es_ES.utf8 doesn't exist in Windows.

Here is my output from your code:

SPANISH

> Sys.setlocale("LC_TIME", "Spanish_Spain.1252")
> format <- "%a@%A@%b@%B@%p@"
> enc2utf8(unique(format(lubridate:::.date_template, format = format)))
 [1] "ju.@jueves@ene.@enero@@"      "lu.@lunes@feb.@febrero@@"     "ma.@martes@mar.@marzo@@"     
 [4] "do.@domingo@abr.@abril@@"     "vi.@viernes@may.@mayo@@"      "ma.@martes@jun.@junio@@"     
 [7] "vi.@viernes@jul.@julio@@"     "mi.@miércoles@ago.@agosto@@"  "ma.@martes@sep.@septiembre@@"
[10] "vi.@viernes@oct.@octubre@@"   "ma.@martes@nov.@noviembre@@"  "sá.@sábado@dic.@diciembre@@" 

> str(lubridate:::.get_locale_regs("Spanish_Spain.1252"))
List of 6
 $ alpha_flex : Named chr [1:6] "((?<b_b>ene\\.|feb\\.|mar\\.|abr\\.|may\\.|jun\\.|jul\\.|ago\\.|sep\\.|oct\\.|nov\\.|dic\\.)|(?<B_b>enero|febre"| __truncated__ "(?<B_B>enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|octubre|noviembre|diciembre)(?![[:alpha:]])" "((?<a_a>ju\\.|lu\\.|ma\\.|do\\.|vi\\.|mi\\.|sá\\.)|(?<A_a>jueves|lunes|martes|domingo|viernes|miércoles|sábado)"| __truncated__ "(?<A_A>jueves|lunes|martes|domingo|viernes|miércoles|sábado)(?![[:alpha:]])" ...
  ..- attr(*, "names")= chr [1:6] "b" "B" "a" "A" ...
 $ num_flex   : Named chr [1:24] "(?<d>[012]?[1-9]|3[01]|[12]0)(?!\\d)" "(?<q>[0]?[1-4])(?!\\d)" "(?<H>2[0-4]|[01]?\\d)(?!\\d)" "(?<H>2[0-4]|[01]?\\d)(?!\\d)" ...
  ..- attr(*, "names")= chr [1:24] "d" "q" "H" "h" ...
 $ alpha_exact: Named chr [1:6] "((?<b_b_e>ene\\.|feb\\.|mar\\.|abr\\.|may\\.|jun\\.|jul\\.|ago\\.|sep\\.|oct\\.|nov\\.|dic\\.)|(?<B_b_e>enero|f"| __truncated__ "(?<B_B_e>enero|febrero|marzo|abril|mayo|junio|julio|agosto|septiembre|octubre|noviembre|diciembre)(?![[:alpha:]])" "((?<a_a_e>ju\\.|lu\\.|ma\\.|do\\.|vi\\.|mi\\.|sá\\.)|(?<A_a_e>jueves|lunes|martes|domingo|viernes|miércoles|sáb"| __truncated__ "(?<A_A_e>jueves|lunes|martes|domingo|viernes|miércoles|sábado)(?![[:alpha:]])" ...
  ..- attr(*, "names")= chr [1:6] "b" "B" "a" "A" ...
 $ num_exact  : Named chr [1:24] "(?<d_e>[012][1-9]|3[01]|[12]0)" "(?<q_e>[0][1-4])" "(?<H_e>2[0-4]|[01]\\d)" "(?<H_e>2[0-4]|[01]\\d)" ...
  ..- attr(*, "names")= chr [1:24] "d" "q" "H" "h" ...
 $ wday_names :List of 2
  ..$ abr : chr [1:7] "do\\." "lu\\." "ma\\." "mi\\." ...
  ..$ full: chr [1:7] "domingo" "lunes" "martes" "miércoles" ...
 $ month_names:List of 2
  ..$ abr : chr [1:12] "ene\\." "feb\\." "mar\\." "abr\\." ...
  ..$ full: chr [1:12] "enero" "febrero" "marzo" "abril" ...

FRENCH

> Sys.setlocale("LC_TIME", "French_France.1252")
> format <- "%a@%A@%b@%B@%p@"
> enc2utf8(unique(format(lubridate:::.date_template, format = format)))
 [1] "jeu.@jeudi@janv.@janvier@@"    "lun.@lundi@févr.@février@@"    "mar.@mardi@mars@mars@@"       
 [4] "dim.@dimanche@avr.@avril@@"    "ven.@vendredi@mai@mai@@"       "mar.@mardi@juin@juin@@"       
 [7] "ven.@vendredi@juil.@juillet@@" "mer.@mercredi@août@août@@"     "mar.@mardi@sept.@septembre@@" 
[10] "ven.@vendredi@oct.@octobre@@"  "mar.@mardi@nov.@novembre@@"    "sam.@samedi@déc.@décembre@@"  

> str(lubridate:::.get_locale_regs("French_France.1252"))
List of 6
 $ alpha_flex : Named chr [1:6] "((?<b_b>janv\\.|févr\\.|mars|avr\\.|mai|juin|juil\\.|août|sept\\.|oct\\.|nov\\.|déc\\.)|(?<B_b>janvier|février|"| __truncated__ "(?<B_B>janvier|février|mars|avril|mai|juin|juillet|août|septembre|octobre|novembre|décembre)(?![[:alpha:]])" "((?<a_a>jeu\\.|lun\\.|mar\\.|dim\\.|ven\\.|mer\\.|sam\\.)|(?<A_a>jeudi|lundi|mardi|dimanche|vendredi|mercredi|s"| __truncated__ "(?<A_A>jeudi|lundi|mardi|dimanche|vendredi|mercredi|samedi)(?![[:alpha:]])" ...
  ..- attr(*, "names")= chr [1:6] "b" "B" "a" "A" ...
 $ num_flex   : Named chr [1:24] "(?<d>[012]?[1-9]|3[01]|[12]0)(?!\\d)" "(?<q>[0]?[1-4])(?!\\d)" "(?<H>2[0-4]|[01]?\\d)(?!\\d)" "(?<H>2[0-4]|[01]?\\d)(?!\\d)" ...
  ..- attr(*, "names")= chr [1:24] "d" "q" "H" "h" ...
 $ alpha_exact: Named chr [1:6] "((?<b_b_e>janv\\.|févr\\.|mars|avr\\.|mai|juin|juil\\.|août|sept\\.|oct\\.|nov\\.|déc\\.)|(?<B_b_e>janvier|févr"| __truncated__ "(?<B_B_e>janvier|février|mars|avril|mai|juin|juillet|août|septembre|octobre|novembre|décembre)(?![[:alpha:]])" "((?<a_a_e>jeu\\.|lun\\.|mar\\.|dim\\.|ven\\.|mer\\.|sam\\.)|(?<A_a_e>jeudi|lundi|mardi|dimanche|vendredi|mercre"| __truncated__ "(?<A_A_e>jeudi|lundi|mardi|dimanche|vendredi|mercredi|samedi)(?![[:alpha:]])" ...
  ..- attr(*, "names")= chr [1:6] "b" "B" "a" "A" ...
 $ num_exact  : Named chr [1:24] "(?<d_e>[012][1-9]|3[01]|[12]0)" "(?<q_e>[0][1-4])" "(?<H_e>2[0-4]|[01]\\d)" "(?<H_e>2[0-4]|[01]\\d)" ...
  ..- attr(*, "names")= chr [1:24] "d" "q" "H" "h" ...
 $ wday_names :List of 2
  ..$ abr : chr [1:7] "dim\\." "lun\\." "mar\\." "mer\\." ...
  ..$ full: chr [1:7] "dimanche" "lundi" "mardi" "mercredi" ...
 $ month_names:List of 2
  ..$ abr : chr [1:12] "janv\\." "févr\\." "mars" "avr\\." ...
  ..$ full: chr [1:12] "janvier" "février" "mars" "avril" ...

dominicroye avatar Dec 02 '19 12:12 dominicroye

Ok, so on Windows all the abbreviations come with dots at the end. Let me see what I can do.

vspinu avatar Dec 02 '19 15:12 vspinu

Should have been fixed. Would really appreciate if you guys could try the dev version and let me know if it works correctly now.

vspinu avatar Mar 08 '20 14:03 vspinu

It is working correctly. Thank you!

dominicroye avatar Mar 09 '20 11:03 dominicroye

I have to reopen this issue since it is still happening with weekdays. I am sorry that I noticed it now!

> library(lubridate)
> Sys.getlocale("LC_TIME")
[1] "Spanish_Spain.1252"

> dt <- seq(ymd("2018-01-01"), ymd("2018-12-31"), "day")

> head(wday(dt, label = TRUE))
[1] lu\\. ma\\. mi\\. ju\\. vi\\. sá\\.
Levels: do\\. < lu\\. < ma\\. < mi\\. < ju\\. < vi\\. < sá\\.

dominicroye avatar Dec 20 '20 12:12 dominicroye

I confirm this bug. However, the solution for the guess_formats (https://github.com/tidyverse/lubridate/commit/cc5f1a6de86863f983fd3f69ac842c31997a03a0) function works and can be easily implemented in .get_locale_regs which is what is used in the wday function.

It is necessary change this line (https://github.com/tidyverse/lubridate/blob/6f26b02de432cd9373ad4ce7766c36eacfc29918/R/guess.r#L311) by this:

  mat[] <- gsub("\\.$", "", mat) # remove abbrev trailing dot in some locales (#781)
  mat[] <- gsub("([].|(){^$*+?[])", "\\\\\\1", mat) # escaping meta chars

brianmsm avatar Mar 08 '21 00:03 brianmsm

I imagine this works correctly with clock, since we don't do anything with regular expressions:

library(clock)

dt <- seq(date_parse("2018-01-01"), date_parse("2018-12-31"), "day")

head(date_month_factor(dt, labels = "es", abbreviate = TRUE))
#> [1] ene. ene. ene. ene. ene. ene.
#> 12 Levels: ene. < feb. < mar. < abr. < may. < jun. < jul. < ago. < ... < dic.

head(date_weekday_factor(dt, labels = "es", abbreviate = TRUE))
#> [1] lun. mar. mié. jue. vie. sáb.
#> Levels: dom. < lun. < mar. < mié. < jue. < vie. < sáb.

If the labels aren't exactly what you expect, you can always create a custom clock_labels() object to use as the labels argument

DavisVaughan avatar May 25 '21 13:05 DavisVaughan

Hi, I found that this bug is still alive for month abbreviations different from their equivalent in English (jan != ene, apr != abr, aug != ago, dec != dic). The bug isn't SO specific: I reproduced it in Linux and Windows: Windows:

library(tidyverse)
library(lubridate)

test_dates <- tibble(abr_dates = c("ene-22", 
                                   "feb-22", 
                                   "mar-22", 
                                   "abr-22",
                                   "may-22",
                                   "jun-22",
                                   "jul-22",
                                   "ago-22",
                                   "sep-22", 
                                   "oct-22",
                                   "nov-22",
                                   "dic-22"))

test_dates %>% 
  mutate(dates_date = my(abr_dates))
#> Warning: There was 1 warning in `mutate()`.
#> ℹ In argument: `dates_date = my(abr_dates)`.
#> Caused by warning:
#> !  4 failed to parse.
#> # A tibble: 12 × 2
#>    abr_dates dates_date
#>    <chr>     <date>    
#>  1 ene-22    NA        
#>  2 feb-22    2022-02-01
#>  3 mar-22    2022-03-01
#>  4 abr-22    NA        
#>  5 may-22    2022-05-01
#>  6 jun-22    2022-06-01
#>  7 jul-22    2022-07-01
#>  8 ago-22    NA        
#>  9 sep-22    2022-09-01
#> 10 oct-22    2022-10-01
#> 11 nov-22    2022-11-01
#> 12 dic-22    NA

Created on 2023-06-11 with reprex v2.0.2

Session info
sessionInfo()
#> R version 4.3.0 (2023-04-21 ucrt)
#> Platform: x86_64-w64-mingw32/x64 (64-bit)
#> Running under: Windows 11 x64 (build 22621)
#> 
#> Matrix products: default
#> 
#> 
#> locale:
#> [1] LC_COLLATE=Spanish_Colombia.utf8  LC_CTYPE=Spanish_Colombia.utf8   
#> [3] LC_MONETARY=Spanish_Colombia.utf8 LC_NUMERIC=C                     
#> [5] LC_TIME=Spanish_Colombia.utf8    
#> 
#> time zone: America/Bogota
#> tzcode source: internal
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#>  [1] lubridate_1.9.2 forcats_1.0.0   stringr_1.5.0   dplyr_1.1.2    
#>  [5] purrr_1.0.1     readr_2.1.4     tidyr_1.3.0     tibble_3.2.1   
#>  [9] ggplot2_3.4.2   tidyverse_2.0.0
#> 
#> loaded via a namespace (and not attached):
#>  [1] gtable_0.3.3     compiler_4.3.0   reprex_2.0.2     tidyselect_1.2.0
#>  [5] scales_1.2.1     yaml_2.3.7       fastmap_1.1.1    R6_2.5.1        
#>  [9] generics_0.1.3   knitr_1.43       munsell_0.5.0    pillar_1.9.0    
#> [13] tzdb_0.4.0       rlang_1.1.1      utf8_1.2.3       stringi_1.7.12  
#> [17] xfun_0.39        fs_1.6.2         timechange_0.2.0 cli_3.6.1       
#> [21] withr_2.5.0      magrittr_2.0.3   digest_0.6.31    grid_4.3.0      
#> [25] rstudioapi_0.14  hms_1.1.3        lifecycle_1.0.3  vctrs_0.6.2     
#> [29] evaluate_0.21    glue_1.6.2       fansi_1.0.4      colorspace_2.1-0
#> [33] rmarkdown_2.22   tools_4.3.0      pkgconfig_2.0.3  htmltools_0.5.5

Linux:

library(reprex)
library(tidyverse)
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union

test_dates <- tibble(abr_dates = c("ene-22", 
                                   "feb-22", 
                                   "mar-22", 
                                   "abr-22",
                                   "may-22",
                                   "jun-22",
                                   "jul-22",
                                   "ago-22",
                                   "sep-22", 
                                   "oct-22",
                                   "nov-22",
                                   "dic-22"))

test_dates %>% 
  mutate(dates_date = my(abr_dates))
#> Warning: 4 failed to parse.
#> # A tibble: 12 × 2
#>    abr_dates dates_date
#>    <chr>     <date>    
#>  1 ene-22    NA        
#>  2 feb-22    2022-02-01
#>  3 mar-22    2022-03-01
#>  4 abr-22    NA        
#>  5 may-22    2022-05-01
#>  6 jun-22    2022-06-01
#>  7 jul-22    2022-07-01
#>  8 ago-22    NA        
#>  9 sep-22    2022-09-01
#> 10 oct-22    2022-10-01
#> 11 nov-22    2022-11-01
#> 12 dic-22    NA

Created on 2023-06-11 with reprex v2.0.2

Session info
sessionInfo()
#> R version 4.2.1 (2022-06-23)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Ubuntu 22.04.2 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
#> 
#> locale:
#>  [1] LC_CTYPE=es_CO.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=es_CO.UTF-8        LC_COLLATE=es_CO.UTF-8    
#>  [5] LC_MONETARY=es_CO.UTF-8    LC_MESSAGES=es_CO.UTF-8   
#>  [7] LC_PAPER=es_CO.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=es_CO.UTF-8 LC_IDENTIFICATION=C       
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#>  [1] lubridate_1.8.0 forcats_0.5.1   stringr_1.4.0   dplyr_1.0.9    
#>  [5] purrr_0.3.4     readr_2.1.2     tidyr_1.2.0     tibble_3.1.8   
#>  [9] ggplot2_3.4.0   tidyverse_1.3.1 reprex_2.0.2   
#> 
#> loaded via a namespace (and not attached):
#>  [1] styler_1.7.0      tidyselect_1.2.0  xfun_0.30         haven_2.5.0      
#>  [5] colorspace_2.0-3  vctrs_0.5.1       generics_0.1.2    htmltools_0.5.3  
#>  [9] yaml_2.3.5        utf8_1.2.2        rlang_1.0.6       R.oo_1.25.0      
#> [13] pillar_1.8.1      glue_1.6.2        withr_2.5.0       DBI_1.1.2        
#> [17] R.utils_2.12.0    dbplyr_2.2.1      readxl_1.4.0      modelr_0.1.8     
#> [21] R.cache_0.16.0    lifecycle_1.0.3   cellranger_1.1.0  munsell_0.5.0    
#> [25] gtable_0.3.0      rvest_1.0.2       R.methodsS3_1.8.2 evaluate_0.15    
#> [29] knitr_1.39        tzdb_0.3.0        fastmap_1.1.0     fansi_1.0.3      
#> [33] highr_0.9         broom_0.8.0       backports_1.4.1   scales_1.2.0     
#> [37] jsonlite_1.8.0    fs_1.5.2          hms_1.1.1         digest_0.6.29    
#> [41] stringi_1.7.6     grid_4.2.1        cli_3.4.1         tools_4.2.1      
#> [45] magrittr_2.0.3    crayon_1.5.1      pkgconfig_2.0.3   ellipsis_0.3.2   
#> [49] xml2_1.3.3        assertthat_0.2.1  rmarkdown_2.14    httr_1.4.2       
#> [53] rstudioapi_0.13   R6_2.5.1          compiler_4.2.1

augusto-umana avatar Jun 11 '23 18:06 augusto-umana

If you are in linux you need to install the locale.

https://orcacore.com/set-up-system-locale-ubuntu-22-04/

renatocava avatar Jan 27 '24 05:01 renatocava