@misc{bellinghausen_using_random_2025, author={Bellinghausen, Kai,Hünicke, Birgit,Zorita, Eduardo}, title={Using random forests to forecast daily extreme sea level occurrences at the Baltic Coast}, year={2025}, howpublished = {journal article}, doi = {https://doi.org/10.5194/nhess-25-1139-2025}, abstract = {We have designed a machine learning method to predict the occurrence of daily extreme sea level at the Baltic Sea coast with lead times of a few days. The method is based on a random forest classifier. It uses spatially resolved fields of daily sea level pressure, surface wind, precipitation, and the pre-filling state of the Baltic Sea as predictors for daily sea level above the 95 % quantile at each of seven tide gauge stations representative of the Baltic coast. The method is purely data-driven and is trained with sea level data from the Global Extreme Sea Level Analysis (GESLA) dataset and from the meteorological reanalysis ERA5 of the European Centre for Medium-Range Weather Forecasts (ECMWF). Sea level extremes at lead times of up to 3 d are satisfactorily predicted by the method, and the relevant predictor and predictor regions are identified. The sensitivity, measured as the proportion of correctly predicted extremes, is, depending on the stations, on the order of 70 %. The precision of the model is typically around 25 % and, for some instances, higher. For lead times longer than 3 d, the predictive skill degrades; for 7 d, it is comparable to a random skill. The sensitivity of our model is higher than the one derived from a storm surge reanalysis with dynamical models that use available information of the predictors without any time lag, as done by Muis et al. (2016), but its precision is considerably lower. The importance of each predictor depends on the location of the tide gauge. Usually, the most relevant predictors are sea level pressure, surface wind, and pre-filling. Extreme sea levels at the meridionally oriented coastlines of the Baltic Sea are better predicted by meridional winds and surface pressure. In contrast, for stations located at zonally oriented coastlines, the most relevant predictors are surface pressure and the zonal wind component. Precipitation did not display consistent patterns or a high relevance predictor for most of the stations analysed. The random forest classifier is not required to have considerable complexity, and the computing time to issue predictions is typically a few minutes on a personal laptop. The method can, therefore, be used as a pre-warning system to trigger the application of more sophisticated algorithms that estimate the height of the ensuing extreme sea level or as a warning to run larger ensembles with physically based numerical models.}, note = {Online available at: \url{https://doi.org/10.5194/nhess-25-1139-2025} (DOI). Bellinghausen, K.; Hünicke, B.; Zorita, E.: Using random forests to forecast daily extreme sea level occurrences at the Baltic Coast. Natural Hazards and Earth System Sciences. 2025. vol. 25, no. 3, 1139-1162. DOI: 10.5194/nhess-25-1139-2025}}