First I'd like to describe idea of what I am trying to do. I have "jobstat_jobs" table where I store the information about computing job perfomance. I am trying to compose 2 queries: 1) jobs grouped by project 2) jobs grouped by project and state. Then these queries are inner joined and I want to display share of jobs of each state among all jobs. I implemented it using ActiveRecord and raw sql, but I can't do it with arel. I get the "stack level too deep" on the "joined.to_sql" line.
members = Core::Member.arel_table
jobs = Perf::Job.arel_table
cool_relation = jobs.where(jobs[:state].not_in(%w[COMPLETETED RUNNING unknown]))
relation = cool_relation.join( Arel::Nodes::SqlLiteral.new <<-SQL
INNER JOIN core_members ON core_members.login = jobstat_jobs.login
SQL
).join(Arel::Nodes::SqlLiteral.new <<-SQL
RIGHT JOIN sessions_projects_in_sessions ON
sessions_projects_in_sessions.project_id = core_members.project_id
SQL
).group(members[:project_id]).project(members[:project_id].as('id'))
hours = '(extract(epoch from (end_time - start_time))/ 3600)'
selections = {
node_hours: "(sum((#{hours})*num_nodes))",
jobs: "count(jobstat_jobs.id)"
}
selections.each do |key, value|
relation = relation.project(
Arel::Nodes::SqlLiteral.new(value).as(key.to_s)
)
end
state_relation = relation.project(jobs[:state].as('state'))
.group(jobs[:state])
s = state_relation.as('s')
pp ActiveRecord::Base.connection.exec_query(state_relation.to_sql).to_a
joined = relation.join(s)
.on(jobs[:id].eq(s[:id]))
.project(s[:id], s[:state])
puts joined.to_sql
joined
I noticed the strange thing. When I replace "joined = relation" with "jobs.where(jobs[:state].not_in(%w[COMPLETETED RUNNING unknown]))" it works. But when I replace "joined = relation" with "joined = cool_relation" it doesn't work and I get "stack level too deep" (these 2 replacements are almost the same).
Arel v 9.0.0, Postgresql
Now that I understand the desired output here is how I would go about this
class Report
JOB_STATS = Arel::Table.new('jobstat_jobs')
CORE_MEMBERS = Arel::Table.new('core_members')
SESSIONS = Arel::Table.new('sessions_projects_in_sessions')
def additions
# This could be ported too if I knew the tables for end_time, start_time, and num_nodes
{
node_hours: Arel.sql("((extract(epoch from (end_time - start_time))/ 3600))*num_nodes").sum,
jobs: JOB_STATS[:id].count
}
end
def n
@n ||= _base_query.as('n')
end
def s
@s ||= _base_query
.project(JOB_STATS[:state])
.group(JOB_STATS[:state]).as('s')
end
def alias_columns
additions.keys.flat_map do |key|
[s[key].as("s_#{key}"),
n[key].as("n_#{key}"),
(s[key] / n[key]).as("share_#{key}")]
end
end
def query
Arel::SelectManager.new.project(
s[:project_id].as('id'),
s[:state],
*alias_columns
)
.from(s)
.join(n).on(s[:project_id].eq(n[:project_id]))
end
def to_sql
query.to_sql
end
private
def cast_as_decimal(value,alias_name:)
Arel::Nodes::NamedFunction.new(
"CAST",
[Arel::Nodes::As.new(value, Arel.sql('DECIMAL'))]
).as(alias_name.to_s)
end
def _base_query
JOB_STATS
.project(
CORE_MEMBERS[:project_id],
*additions.map {|k,v| cast_as_decimal(v, alias_name: k)})
.join(CORE_MEMBERS).on(CORE_MEMBERS[:login].eq(JOB_STATS[:login]))
.outer_join(SESSIONS).on(SESSIONS[:project_id].eq(CORE_MEMBERS[:project_id]))
.where(JOB_STATS[:state].not_in(['COMPLETETED', 'RUNNING', 'unknown']))
.group(CORE_MEMBERS[:project_id])
end
end
Result of Report.new.to_sql
SELECT
s."project_id" AS id,
s."state",
s."node_hours" AS s_node_hours,
n."node_hours" AS n_node_hours,
s."node_hours" / n."node_hours" AS share_node_hours,
s."jobs" AS s_jobs,
n."jobs" AS n_jobs,
s."jobs" / n."jobs" AS share_jobs
FROM
(
SELECT
"core_members"."project_id",
CAST(SUM(((extract(epoch from (end_time - start_time))/ 3600))*num_nodes) AS DECIMAL) AS node_hours,
CAST(COUNT("jobstat_jobs"."id") AS DECIMAL) AS jobs,
"jobstat_jobs"."state"
FROM
"jobstat_jobs"
INNER JOIN "core_members" ON "core_members"."login" = "jobstat_jobs"."login"
LEFT OUTER JOIN "sessions_projects_in_sessions" ON "sessions_projects_in_sessions"."project_id" = "core_members"."project_id"
WHERE
"jobstat_jobs"."state" NOT IN (N'COMPLETETED', N'RUNNING', N'unknown')
GROUP BY
"core_members"."project_id",
"jobstat_jobs"."state"
) s
INNER JOIN (
SELECT
"core_members"."project_id",
CAST(SUM(((extract(epoch from (end_time - start_time))/ 3600))*num_nodes) AS DECIMAL) AS node_hours,
CAST(COUNT("jobstat_jobs"."id") AS DECIMAL) AS jobs
FROM
"jobstat_jobs"
INNER JOIN "core_members" ON "core_members"."login" = "jobstat_jobs"."login"
LEFT OUTER JOIN "sessions_projects_in_sessions" ON "sessions_projects_in_sessions"."project_id" = "core_members"."project_id"
WHERE
"jobstat_jobs"."state" NOT IN (N'COMPLETETED', N'RUNNING', N'unknown')
GROUP BY
"core_members"."project_id"
) n ON s."project_id" = n."project_id"
This will also allow you further filter the resulting query like so:
rpt = Report.new
q = rpt.query.where(rpt.n[:jobs].gt(12))
q.to_sql
#=> "...same as above...WHERE n.\"jobs\" > 12"