Register now to learn Fabric in free live sessions led by the best Microsoft experts. From Apr 16 to May 9, in English and Spanish.
Hello,
I have a python script that I've written for time series forecasting. Now I want to use it in power bi but I'm getting attached error:
Also you can find my python code below:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#import matplotlib.dates as mdates
#import seaborn as sns
#from statsmodels.tsa.arima_model import ARMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
#from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import pmdarima as pm
#from numpy import cumsum
#import csv
#import pyodbc
from datetime import datetime
from dateutil.relativedelta import *
class DataPrep:
def __init__(self):
self.df = pd.DataFrame()
self.mega_projects = set()
self.mega_project_to_df = {}
self.mega_project_to_df_pvt = {}
def read_data(self):
self.df = pd.read_csv('C:/xx/Beyza/Downloads/xxxxxx.csv', parse_dates=['CreatedDate'])
self.df = self.df[['EstimateManDay', 'CreatedDate', 'MegaProject', 'ProjectName']]
self.df['month'] = pd.DatetimeIndex(self.df['CreatedDate']).month
self.df['year'] = pd.DatetimeIndex(self.df['CreatedDate']).year
self.df['quarter'] = pd.DatetimeIndex(self.df['CreatedDate']).quarter
self.df['week'] = pd.DatetimeIndex(self.df['CreatedDate']).week
self.df['dayorg'] = pd.DatetimeIndex(self.df['CreatedDate']).day
self.df['day'] = 1
self.df['year_quarter'] = self.df['year'].astype(str) + "_" + self.df[
'quarter'].astype(str)
self.df['year_month'] = self.df['year'].astype(str) + "_" + self.df[
'month'].astype(str)
self.df['year_week'] = self.df['year'].astype(str) + "_" + self.df['week'].astype(
str)
self.df['date'] = pd.to_datetime(self.df[['year', 'month', 'day']])
self.df = self.df[self.df['CreatedDate'] <= datetime.strptime("2020-01-01", "%Y-%m-%d")]
self.mega_projects = {project for project in self.df['MegaProject']}
for project in self.mega_projects:
self.mega_project_to_df[project] = self.df[self.df.MegaProject == project]
def pvt_table(self):
for project, df in self.mega_project_to_df.items():
df_pvt = pd.pivot_table(df, index='date', values='EstimateManDay', aggfunc='sum')
df_pvt = self.populate_rows(df_pvt)
self.mega_project_to_df_pvt[project] = df_pvt
def populate_rows(self,df_pvt):
df_pvt = df_pvt.sort_index()
prev_index = None
total_df = pd.DataFrame()
for index, row in df_pvt.iterrows():
if prev_index:
prev_index_datetime = prev_index.to_pydatetime()
index_datetime = index.to_pydatetime()
while prev_index_datetime + relativedelta(months=+1) != index_datetime:
prev_index_datetime = prev_index_datetime + relativedelta(months=+1)
populate_df = pd.DataFrame.from_dict({"date":[pd.Timestamp(prev_index_datetime)],"EstimateManDay":[0]})
total_df = total_df.append(populate_df)
prev_index = index
if total_df.size > 0:
total_df = total_df.set_index("date")
df_pvt = df_pvt.append(total_df)
df_pvt = df_pvt.sort_index()
return df_pvt
class AutoArima(DataPrep):
def __init__(self):
super().__init__()
self.test = {}
def adf_test(self):
self.read_data()
self.pvt_table()
for project, pivot_df in self.mega_project_to_df_pvt.items():
try:
adf_test = adfuller(pivot_df['EstimateManDay'])
except ValueError as err:
continue
test_statistics = adf_test[0]
p_value = adf_test[1]
if p_value <= 0.05:
print("p_value:", adf_test[1])
print("Test Statistics:", adf_test[0])
arima_dict = self.arimaopt_values(pivot_df)
model_results, mean_abs_err = self.model_fitting(pivot_df, arima_dict['order'],
arima_dict['seasonal_order'])
diff_forecast = self.forecast(model_results, False, None)
self.writer(diff_forecast, project)
fig, ax = plt.subplots(figsize=(20, 10))
plt.plot(pivot_df, label='observed', color='blue')
plt.plot(diff_forecast, label='forecast', color='red')
plt.legend()
plt.show()
else:
try:
decomps = self.decomposition_df(pivot_df)
except ValueError as err:
continue
arima_dict = self.arimaopt_values(decomps)
model_results, mean_abs_err = self.model_fitting(decomps, arima_dict['order'],
arima_dict['seasonal_order'])
diff_forecast = self.forecast(model_results, True, decomps)
self.writer(diff_forecast, project)
fig, ax = plt.subplots(figsize=(20, 10))
plt.plot(pivot_df, label='observed', color='blue')
plt.plot(diff_forecast, label='forecast', color='red')
plt.legend()
plt.show()
def decomposition_df(self, pivot_df):
decomps = seasonal_decompose(pivot_df['EstimateManDay'], period=12)
return decomps
def arimaopt_values(self, pivot_df):
auto_arima = pm.auto_arima(pivot_df,
d=0,
start_p=1,
start_1=1,
max_p=3,
max_q=3,
seasonal=True,
m=12,
D=1,
supress_warnings=True,
trace=True,
error_action='ignore',
stepwise=True)
auto_arima.summary()
auto_arima_dict = auto_arima.to_dict()
response_dict = {'seasonal_order': auto_arima_dict['seasonal_order'], 'order': auto_arima_dict['order']}
return response_dict
def model_fitting(self, pvt_table, order, seasonal_order):
model = SARIMAX(pvt_table, order=order, seasonal_order=seasonal_order)
results = model.fit()
residuals = results.resid
mean_abs_err = np.mean(np.abs(residuals))
return results, mean_abs_err
def forecast(self, model_results, is_high_pvalue, decomps):
diff_forecast = model_results.get_forecast(steps=12).predicted_mean
if is_high_pvalue:
diff_forecast = np.cumsum(diff_forecast) + decomps.iloc[-1, 0]
diff_forecast = pd.DataFrame(diff_forecast)
diff_forecast.columns = ['EstimateManDay']
return diff_forecast
aa= AutoArima()
aa.adf_test()
Thanks a lot.
Solved! Go to Solution.
OK, @Anonymous glad you got the module stuff solved! 🙂
When I used to troubleshoot this kind of thing in R, I would always test my R code as best I could in actual R Studio to make certain it ran and did not have bugs before trying to use it in Power BI. This way I could be certain whether or not the problem was with my code or with Power BI. Have you done this with your Python code?
The reason I ask is that this forum is going to have lots of people that know Power BI but not necessarily know Python so if the problem is with the Python code, it would be better to find a Python experts forum.
If you are in the Desktop, this generally means that Power BI cannot find the stated module in the correct file folder. Check your File | Options and settings | Options and then Python to see where Power BI is expecting these to be.
@Greg_Deckler By the way I've tried to uninstall the Power BI desktop and install again but it didn't work too. Also I've deleted my pycharm project and created again but still I'm getting the same error.
You can find attached the error.
Hello again,
The problem is solved because there is a huge misunderstanding, I've installed matplotlib and solved.
Now I'm writing my script on the Power BI python script editor, like below:
But I'm getting below error:
Feedback Type:
Frown (Error)
Timestamp:
2020-04-07T08:01:05.4941627Z
Local Time:
2020-04-07T11:01:05.4941627+03:00
Session ID:
2703b294-7db1-4945-a8ce-0f8cfb6fcd69
Release:
March 2020
Product Version:
2.79.5768.1082 (20.03) (x64)
Error Message:
Python script error.
Traceback (most recent call last):
File "C:\Users\Beyza\PycharmProjects\untitled4\venv\lib\site-packages\pandas\core\indexes\base.py", line 2646, in get_loc
return self._engine.get_loc(key)
File "pandas\_libs\index.pyx", line 111, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1619, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1627, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'CreatedDate'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "PythonScriptWrapper.PY", line 20, in <module>
dataset['month']=pd.DatetimeIndex(dataset['CreatedDate']).month
File "C:\Users\Beyza\PycharmProjects\untitled4\venv\lib\site-packages\pandas\core\frame.py", line 2800, in __getitem__
indexer = self.columns.get_loc(key)
File "C:\Users\Beyza\PycharmProjects\untitled4\venv\lib\site-packages\pandas\core\indexes\base.py", line 2648, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\_libs\index.pyx", line 111, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1619, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1627, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'CreatedDate'
Stack Trace:
Microsoft.PowerBI.ExploreServiceCommon.ScriptHandlerException: Python script error.
Traceback (most recent call last):
File "C:\Users\Beyza\PycharmProjects\untitled4\venv\lib\site-packages\pandas\core\indexes\base.py", line 2646, in get_loc
return self._engine.get_loc(key)
File "pandas\_libs\index.pyx", line 111, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1619, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1627, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'CreatedDate'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "PythonScriptWrapper.PY", line 20, in <module>
dataset['month']=pd.DatetimeIndex(dataset['CreatedDate']).month
File "C:\Users\Beyza\PycharmProjects\untitled4\venv\lib\site-packages\pandas\core\frame.py", line 2800, in __getitem__
indexer = self.columns.get_loc(key)
File "C:\Users\Beyza\PycharmProjects\untitled4\venv\lib\site-packages\pandas\core\indexes\base.py", line 2648, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\_libs\index.pyx", line 111, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1619, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1627, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'CreatedDate'
---> Microsoft.PowerBI.Scripting.Python.Exceptions.PythonScriptRuntimeException: Python script error.
Traceback (most recent call last):
File "C:\Users\Beyza\PycharmProjects\untitled4\venv\lib\site-packages\pandas\core\indexes\base.py", line 2646, in get_loc
return self._engine.get_loc(key)
File "pandas\_libs\index.pyx", line 111, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1619, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1627, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'CreatedDate'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "PythonScriptWrapper.PY", line 20, in <module>
dataset['month']=pd.DatetimeIndex(dataset['CreatedDate']).month
File "C:\Users\Beyza\PycharmProjects\untitled4\venv\lib\site-packages\pandas\core\frame.py", line 2800, in __getitem__
indexer = self.columns.get_loc(key)
File "C:\Users\Beyza\PycharmProjects\untitled4\venv\lib\site-packages\pandas\core\indexes\base.py", line 2648, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\_libs\index.pyx", line 111, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
File "pandas\_libs\hashtable_class_helper.pxi", line 1619, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas\_libs\hashtable_class_helper.pxi", line 1627, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'CreatedDate'
at Microsoft.PowerBI.Scripting.Python.PythonScriptWrapper.RunScript(String originalScript, Int32 timeoutMs)
at Microsoft.PowerBI.Client.Windows.Python.PythonScriptHandler.GenerateVisual(ScriptHandlerOptions options)
--- End of inner exception stack trace ---
at Microsoft.PowerBI.Client.Windows.Python.PythonScriptHandler.GenerateVisual(ScriptHandlerOptions options)
at Microsoft.PowerBI.ExploreServiceCommon.ScriptVisualCommandFlow.RunInternal(Stream dataShapeResultStream, QueryBindingDescriptor& bindingDescriptor)
at Microsoft.PowerBI.ExploreServiceCommon.ScriptVisualCommandFlow.Run(Stream dataShapeResultStream, QueryBindingDescriptor& bindingDescriptor)
at Microsoft.PowerBI.ExploreHost.SemanticQuery.ExecuteSemanticQueryFlow.TransformDataShapeResult(QueryCommand transformCommand, SemanticQueryDataShapeCommand command, Stream dataShapeResultStream, QueryBindingDescriptor& bindingDescriptor)
at Microsoft.PowerBI.ExploreHost.SemanticQuery.ExecuteSemanticQueryFlow.ExecuteDataQuery(IQueryResultDataWriter queryResultDataWriter, EngineDataModel engineDataModel, DataQuery query, Int32 queryId, ServiceErrorStatusCode& serviceErrorStatusCode, CancellationToken cancelToken)
at Microsoft.PowerBI.ExploreHost.SemanticQuery.ExecuteSemanticQueryFlow.ProcessAndWriteSemanticQueryCommands(IQueryResultsWriter queryResultsWriter, IList`1 queries, HashSet`1 pendingQueriesToCancel, EngineDataModel engineDataModel)
Invocation Stack Trace:
at Microsoft.Mashup.Host.Document.ExceptionExtensions.GetCurrentInvocationStackTrace()
at Microsoft.Mashup.Client.UI.Shared.StackTraceInfo..ctor(String exceptionStackTrace, String invocationStackTrace, String exceptionMessage)
at Microsoft.PowerBI.Client.Windows.ErrorHostService.GetErrorDetails(ShowErrorDialogArgs args)
at Microsoft.PowerBI.Client.Windows.ErrorHostService.<>c__DisplayClass2_0.<<ShowErrorDialog>b__0>d.MoveNext()
at System.Runtime.CompilerServices.AsyncTaskMethodBuilder.Start[TStateMachine](TStateMachine& stateMachine)
at Microsoft.PowerBI.Client.Windows.ErrorHostService.<>c__DisplayClass2_0.<ShowErrorDialog>b__0()
at Microsoft.Mashup.Host.Document.SynchronizationContextExtensions.<>c__DisplayClass1_0`1.<SendAndMarshalExceptions>b__0()
at Microsoft.Mashup.Host.Document.SynchronizationContextExtensions.<>c__DisplayClass0_1.<SendAndMarshalExceptions>b__0(Object null)
at System.RuntimeMethodHandle.InvokeMethod(Object target, Object[] arguments, Signature sig, Boolean constructor)
at System.Reflection.RuntimeMethodInfo.UnsafeInvokeInternal(Object obj, Object[] parameters, Object[] arguments)
at System.Delegate.DynamicInvokeImpl(Object[] args)
at System.Windows.Forms.Control.InvokeMarshaledCallbackDo(ThreadMethodEntry tme)
at System.Windows.Forms.Control.InvokeMarshaledCallbackHelper(Object obj)
at System.Threading.ExecutionContext.RunInternal(ExecutionContext executionContext, ContextCallback callback, Object state, Boolean preserveSyncCtx)
at System.Threading.ExecutionContext.Run(ExecutionContext executionContext, ContextCallback callback, Object state, Boolean preserveSyncCtx)
at System.Threading.ExecutionContext.Run(ExecutionContext executionContext, ContextCallback callback, Object state)
at System.Windows.Forms.Control.InvokeMarshaledCallback(ThreadMethodEntry tme)
at System.Windows.Forms.Control.InvokeMarshaledCallbacks()
at System.Windows.Forms.Control.WndProc(Message& m)
at System.Windows.Forms.NativeWindow.Callback(IntPtr hWnd, Int32 msg, IntPtr wparam, IntPtr lparam)
at System.Windows.Forms.UnsafeNativeMethods.DispatchMessageW(MSG& msg)
at System.Windows.Forms.UnsafeNativeMethods.DispatchMessageW(MSG& msg)
at System.Windows.Forms.Application.ComponentManager.System.Windows.Forms.UnsafeNativeMethods.IMsoComponentManager.FPushMessageLoop(IntPtr dwComponentID, Int32 reason, Int32 pvLoopData)
at System.Windows.Forms.Application.ThreadContext.RunMessageLoopInner(Int32 reason, ApplicationContext context)
at System.Windows.Forms.Application.ThreadContext.RunMessageLoop(Int32 reason, ApplicationContext context)
at System.Windows.Forms.Form.ShowDialog(IWin32Window owner)
at Microsoft.Mashup.Client.UI.Shared.WebDialogs.WebDialog.<>n__0(IWindowHandle owner)
at Microsoft.Mashup.Client.UI.Shared.WindowManager.ShowModal[T](T dialog, Func`1 showModalFunction)
at Microsoft.PowerBI.Client.Program.<>c__DisplayClass2_0.<Main>b__1()
at Microsoft.PowerBI.Client.Windows.IExceptionHandlerExtensions.<>c__DisplayClass3_0.<HandleExceptionsWithNestedTasks>b__0()
at Microsoft.Mashup.Host.Document.ExceptionHandlerExtensions.HandleExceptions(IExceptionHandler exceptionHandler, Action action)
at Microsoft.PowerBI.Client.Program.Main(String[] args)
OS Version:
Microsoft Windows NT 10.0.18362.0 (x64 en-US)
CLR Version:
4.7 or later [Release Number = 528040]
Peak Virtual Memory:
38.7 GB
Private Memory:
573 MB
Peak Working Set:
625 MB
IE Version:
11.719.18362.0
User ID:
dd7acbbc-799c-4a9c-a94c-115b80ade3be
Workbook Package Info:
1* - en-GB, Query Groups: 0, fastCombine: Disabled, runBackgroundAnalysis: True.
Telemetry Enabled:
True
Snapshot Trace Logs:
C:\Users\Beyza\AppData\Local\Microsoft\Power BI Desktop\FrownSnapShot1255098905.zip
Model Default Mode:
Import
Model Version:
PowerBI_V1
Is Report V3 Models Enabled:
False
Performance Trace Logs:
C:\Users\Beyza\AppData\Local\Microsoft\Power BI Desktop\PerformanceTraces.zip
Enabled Preview Features:
PBI_showMinervaRibbon
PBI_showMinervaViewNavigator
Disabled Preview Features:
PBI_shapeMapVisualEnabled
PBI_SpanishLinguisticsEnabled
PBI_NewWebTableInference
PBI_qnaLiveConnect
PQ_AIInsightsFunctionBrowser
PBI_realTimeRefresh
PBI_v3Models
PBI_decompositionTree
PBI_themeDialog
PBI_hierarchicalSlicerCreation
Disabled DirectQuery Options:
TreatHanaAsRelationalSource
Cloud:
GlobalCloud
DPI Scale:
150%
Supported Services:
Power BI
Formulas:
section Section1;
shared df = let
Source = Python.Execute("import pandas as pd#(lf)df=pd.read_csv('C:/Users/Beyza/Desktop/workload_filtered_csv.csv')#(lf)print(df)"),
df1 = Source{[Name="df"]}[Value],
#"Changed Type" = Table.TransformColumnTypes(df1,{{"ProjectName", type text}, {"CreatedDate", type datetime}, {"EstimateManDay2", type number}, {"EstimateManDay", type number}, {"MegaProject", type text}})
in
#"Changed Type";
OK, @Anonymous glad you got the module stuff solved! 🙂
When I used to troubleshoot this kind of thing in R, I would always test my R code as best I could in actual R Studio to make certain it ran and did not have bugs before trying to use it in Power BI. This way I could be certain whether or not the problem was with my code or with Power BI. Have you done this with your Python code?
The reason I ask is that this forum is going to have lots of people that know Power BI but not necessarily know Python so if the problem is with the Python code, it would be better to find a Python experts forum.
Dear @Greg_Deckler you're hundred percent right. I have tried on my python ide then I've connected with powerBI, it worked.
Now I'm working on my python code 🙂
Thank you very much for your reply and help.
Best regards.
Dear @Greg_Deckler thanks for your return.
But main problem focus is changed, because first I've decided to run a simple python script but it does not work too.
I'm just trying to get data with pandas read_csv module, but I'm getting "There is no module as matplotlib" I have no idea why?
Covering the world! 9:00-10:30 AM Sydney, 4:00-5:30 PM CET (Paris/Berlin), 7:00-8:30 PM Mexico City
Check out the April 2024 Power BI update to learn about new features.
User | Count |
---|---|
113 | |
97 | |
80 | |
69 | |
59 |
User | Count |
---|---|
150 | |
119 | |
104 | |
87 | |
67 |