I'm trying to test Pyspark with Snowflake with below python code :
from pyspark import SparkConf, SparkContext
from pyspark.sql import SQLContext
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *
mySparkContext = SparkContext("local", "snowtest_spark")
mySparkContext.setLogLevel("ERROR")
spark = SQLContext(mySparkContext )
spark_conf = SparkConf().setMaster('local')
spark = SparkSession.builder \
.master("local") \
.appName("snowtest_spark") \
.config('spark.jars','G:\\ABHISHEK\\Snowflake\\Jars\\snowflake-jdbc-3.20.0.jar,G:\\ABHISHEK\\Snowflake\\Jars\\spark-snowflake_2.12-3.0.0.jar') \
.getOrCreate()
sfparams = { "sfURL" : "XXXX-YYYY.snowflakecomputing", "sfUser" : "AAAAA", "sfPassword" : "XXXXX", "sfDatabase" : "ABHI_DB", "sfSchema" : "ABHI_SCHEMA", "sfWarehouse" : "compute_wh" }
SNOWFLAKE_SOURCE_NAME = "net.snowflake.spark.snowflake"
#read full table
df1 = spark.read.format(SNOWFLAKE_SOURCE_NAME) \
.options(**sfparams) \
.option("dbtable", "ABHI_TABLE") \
.load()
df1.show()
I'm getting below error in Pycharm (windows env) :
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/11/21 12:01:16 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
G:\ABHISHEK\Snowflake\Jars\spark\spark-3.5.3-bin-hadoop3\python\pyspark\sql\context.py:113: FutureWarning: Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.
warnings.warn(
Traceback (most recent call last):
File "C:\Users\Abhishek\PycharmProjects\pythonProject2\.venv\snowtest_spark.py", line 41, in <module>
.load()
^^^^^^
File "G:\ABHISHEK\Snowflake\Jars\spark\spark-3.5.3-bin-hadoop3\python\pyspark\sql\readwriter.py", line 314, in load
return self._df(self._jreader.load())
^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Abhishek\PycharmProjects\pythonProject2\.venv\Lib\site-packages\py4j\java_gateway.py", line 1322, in __call__
return_value = get_return_value(
^^^^^^^^^^^^^^^^^
File "G:\ABHISHEK\Snowflake\Jars\spark\spark-3.5.3-bin-hadoop3\python\pyspark\errors\exceptions\captured.py", line 179, in deco
return f(*a, **kw)
^^^^^^^^^^^
File "C:\Users\Abhishek\PycharmProjects\pythonProject2\.venv\Lib\site-packages\py4j\protocol.py", line 326, in get_return_value
raise Py4JJavaError(
py4j.protocol.Py4JJavaError: An error occurred while calling o41.load.
: .apache.spark.SparkClassNotFoundException: [DATA_SOURCE_NOT_FOUND] Failed to find the data source: net.snowflake.spark.snowflake. Please find packages at `.html`.
at .apache.spark.sql.errors.QueryExecutionErrors$.dataSourceNotFoundError(QueryExecutionErrors.scala:725)
at .apache.spark.sql.execution.datasources.DataSource$.lookupDataSource(DataSource.scala:647)
at .apache.spark.sql.execution.datasources.DataSource$.lookupDataSourceV2(DataSource.scala:697)
at .apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:208)
at .apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:172)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
at java.lang.reflect.Method.invoke(Unknown Source)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
at py4j.Gateway.invoke(Gateway.java:282)
at py4jmands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4jmands.CallCommand.execute(CallCommand.java:79)
at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
at java.lang.Thread.run(Unknown Source)
Caused by: java.lang.ClassNotFoundException: net.snowflake.spark.snowflake.DefaultSource
at java.URLClassLoader.findClass(Unknown Source)
at java.lang.ClassLoader.loadClass(Unknown Source)
at java.lang.ClassLoader.loadClass(Unknown Source)
at .apache.spark.sql.execution.datasources.DataSource$.$anonfun$lookupDataSource$5(DataSource.scala:633)
at scala.util.Try$.apply(Try.scala:213)
at .apache.spark.sql.execution.datasources.DataSource$.$anonfun$lookupDataSource$4(DataSource.scala:633)
at scala.util.Failure.orElse(Try.scala:224)
at .apache.spark.sql.execution.datasources.DataSource$.lookupDataSource(DataSource.scala:633)
... 15 more
Process finished with exit code 1
After reading various forums it looks like its a version issue with jars . I'm using the below jar versions :
snowflake-jdbc-3.20.0.jar
spark-snowflake_2.12-3.0.0.jar
spark-3.5.3-bin-hadoop3
Can anyone please suggest the jar versions in a working setup for Pycharm running Python 3.12.0 ?
发布者:admin,转转请注明出处:http://www.yc00.com/questions/1742306736a4419089.html
评论列表(0条)