1. library(ggplot2) library(ggmosaic) #ggplot(knime.in, aes(Sexe, fill=Sexe)) + #geom_bar() + #facet_wrap(~ Service) + #theme_classic() ggplot(knime.in, aes(Sexe, fill=Sexe)) + geom_mosaic(aes(x = product(Sexe,Service), fill=Sexe)) + theme_classic() + coord_cartesian(expand=0)+ #theme(axis.text.y = element_blank(), #axis.ticks.y = element_blank()) + labs(title='Repartition des effectifs par Sexe et Service', x = "", y = "") + scale_y_productlist() 2. library(ggplot2) ggplot(knime.in, aes(Sexe, fill=Sexe)) + geom_bar() + theme_classic() + labs(title="Nombre d'employés par Sexe") 3. knime.out <- knime.in #HO = 0.5 H/F prop.test(table(knime.in$Sexe), p = 0.5, alternative = "two.sided", conf.level = 0.95, correct = TRUE) 4. knime.out <- knime.in #test independance [H0 Sexe et Service sont independants] chisq.test(table(knime.in$"Sexe",knime.in$"Service")) 5. library(ggplot2) library(tidyverse) #ggplot(knime.in, aes(`Salaire base mensuel`)) + geom_histogram(aes(fill=Sexe), color='black', alpha=0.5, position='dodge', bins=6) + #facet_grid(Service ~ Sexe) + theme_classic() #+ theme(axis.text.x = element_text(angle = 90, hjust = 1)) ggplot(knime.in, aes(Sexe, `Salaire base mensuel`)) + geom_boxplot(aes(fill=Sexe), varwidth=TRUE) + stat_summary(fun= mean) + facet_wrap(~ Service) + theme_classic() + labs(title='Distribution des salaires par sexe et service') 6. library(ggplot2) library(tidyverse) #ggplot(knime.in, aes(`Salaire base mensuel`)) + geom_histogram(aes(fill=Sexe), color='black', alpha=0.5, position='dodge', bins=6) + #facet_grid(Service ~ Sexe) + theme_classic() #+ theme(axis.text.x = element_text(angle = 90, hjust = 1)) ggplot(knime.in, aes(Sexe, `Salaire base mensuel`)) + stat_summary(geom='bar',fun= mean, aes(fill=Sexe)) + stat_summary(geom='text', fun = mean, aes(label=round(..y..)),vjust=-0.5 , size=6) + theme_classic() 7. library(ggplot2) library(lubridate) ggplot(knime.in, aes(x = Sexe, y = year(today()) - year(Date_naissance))) + geom_boxplot(aes(color=Sexe)) + geom_jitter(aes(color=Sexe)) + stat_summary(fun=mean) + facet_wrap( ~ Service) + labs(title="Distribution des ages par service", y="Age")+ theme_classic() 8. library(tidyverse) library(lubridate) ggplot(knime.in, aes(x= Sexe, y = 2021 - year(Date_naissance), color=Sexe)) + geom_boxplot()+ geom_jitter()+ stat_summary(fun = mean, color='black')+ theme_classic() + labs(y='Age') 9. library(ggplot2) ggplot(knime.in, aes(Sexe, fill=Contrat)) + geom_bar(position=position_dodge(width=0.5)) + scale_fill_viridis_d(option="plasma") + theme_classic() + labs(title='Nombre de personnes par sexe et type de contrat', y='') + coord_cartesian(expand = 0, clip='off') 10. knime.out <- knime.in #HO --> diff in props == 0 prop.test(table(knime.in$"Sexe", knime.in$"Contrat"), alternative = "two.sided", conf.level = 0.95) 11. knime.out <- knime.in library(dplyr) library(lubridate) knime.out %>% mutate(Age = year(today()) - year(Date_naissance)) %>% t.test(Age~Sexe, data=.) 12. library(ggplot2) #ggplot(knime.in, aes(Sexe, Promotion, fill=Sexe)) + #stat_summary(geom = 'bar', fun=sum) + #theme_classic() ggplot(knime.in, aes(Sexe,fill=factor(Promotion, labels=c("Non","Oui")))) + geom_bar(position="dodge", color="black") + scale_fill_viridis_d("Promotion") + labs(title="Repartition des promotions par sexe", y="N. personnes") + coord_cartesian(expand=0) + theme_classic() 13. library(tidyverse) #hist par service #ggplot(knime.in, aes(Ancienneté_an, fill=Sexe)) + #geom_histogram(binwidth=5, color="white") + #facet_grid(Service ~ Sexe) + #geom_vline(data = knime.in %>% #group_by(Service, Sexe) %>% #summarise(moy = mean(Ancienneté_an)), aes(xintercept = moy), linetype='dotted') + #theme_minimal() #ggplot(knime.in, aes(Ancienneté_an, fill=Sexe)) + #geom_histogram(binwidth=5, color="white") + theme_classic() + #labs(title="Repartition des effectifs selon ancienneté",x="Ancienneté",y="Nombre de personnes") ggplot(knime.in, aes(x= Sexe, y = Ancienneté_an, color=Sexe)) + geom_boxplot() + stat_summary(fun=mean, color='black') + geom_jitter() 14. knime.out <- knime.in #HO --> diff in props == 0 prop.test(table(knime.in$Sexe, knime.in$"Augmentation"), alternative = "two.sided", conf.level = 0.95, correct = TRUE) chisq.test(table(knime.in$Sexe, knime.in$Augmentation)) prop.table(table(knime.in$Sexe, knime.in$Augmentation),1) 15. library(ggplot2) ggplot(knime.in) + geom_bar(aes(fill=Work_accident, x= Sexe), position="dodge") + labs(title='Repartition des accident de travail par sexe', x ='Sexe', y='Nombre de personnes') + scale_fill_viridis_d() + theme_classic() + coord_cartesian(expand=0) 16. knime.out <- knime.in #HO = 0.5 H/F prop.test(table(knime.out$Sexe, knime.out$Work_accident), alternative = "two.sided", conf.level = 0.95, correct = TRUE) 17. library(dplyr) library(lubridate) knime.out <- knime.in #PII id, prenom-nom,telephone # substitution id salarié avec id aleatoire, indexation knime.out$"id_salarié" <- sample(1:nrow(knime.out), nrow(knime.out)) knime.out[order(knime.out$"id_salarié"), ] -> knime.out row.names(knime.out) <- knime.out$"id_salarié" knime.out[,1] <- NULL #drop telephone et prenom nom knime.out <- knime.out %>% select(-c(`Prénom/Nom`, Telephone)) #age, salaire, dist., sat., enf. binning apply_quantiles <- function(x, n=4) { cut(x, breaks=quantile(x, probs=seq(0,1,1/n)), right=FALSE, include.lowest=TRUE, dig.lab=4) } knime.out %>% mutate(Age = year(today()) - year(Date_naissance)) %>% mutate(Age = apply_quantiles(Age, n=3), `Salaire base mensuel` = apply_quantiles(`Salaire base mensuel`), `Distance domicile/Travail` = apply_quantiles(`Distance domicile/Travail`,3), Ancienneté_an = apply_quantiles(Ancienneté_an,3) , `Niveau de satisfaction` = apply_quantiles(`Niveau de satisfaction`, n =2), Enfants = if_else(Enfants > 0, 'Oui', 'Non'), `Durée hebdo` = if_else(`Durée hebdo` >= 35, "temps plein", "mi-temps")) %>% select(-c("Date_naissance")) -> knime.out